langchainrb 0.13.1 → 0.13.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +11 -0
- data/README.md +6 -3
- data/lib/langchain/assistants/assistant.rb +44 -6
- data/lib/langchain/assistants/messages/anthropic_message.rb +75 -0
- data/lib/langchain/llm/anthropic.rb +8 -0
- data/lib/langchain/llm/aws_bedrock.rb +62 -9
- data/lib/langchain/llm/google_gemini.rb +31 -0
- data/lib/langchain/llm/google_vertex_ai.rb +4 -1
- data/lib/langchain/llm/hugging_face.rb +19 -8
- data/lib/langchain/llm/ollama.rb +63 -30
- data/lib/langchain/llm/openai.rb +21 -10
- data/lib/langchain/llm/response/anthropic_response.rb +11 -1
- data/lib/langchain/llm/response/google_gemini_response.rb +5 -1
- data/lib/langchain/llm/response/ollama_response.rb +12 -8
- data/lib/langchain/processors/xls.rb +27 -0
- data/lib/langchain/tool/base.rb +12 -0
- data/lib/langchain/tool/news_retriever/news_retriever.json +2 -1
- data/lib/langchain/tool/tavily/tavily.json +54 -0
- data/lib/langchain/tool/tavily/tavily.rb +62 -0
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +1 -0
- metadata +43 -39
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d39fd55aeaa36e9d2a0009dac4808743d97941bd139bc9373eafb153cfb7854e
|
4
|
+
data.tar.gz: ddf8757341169f2e38bb076a9dfb5f5328a0de4e01ce58ec5b3263fdd3c47105
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 243d774a05c2d5bfa27c8f98f51adac3ff13252cb108de085d8ac21b65977b8e8c7f9b78f4a58872b2a299c1e615b9fa6852cf27b564b567f6fa3d15dea2cf26
|
7
|
+
data.tar.gz: 6665f726069148c0adc24947d570b67d2f56f00ad5ffeeb6653fe745401f14768b324c67711cdaf023e782a3fa61c2787050ed2087db6ca34410a65a5e02a8ec
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,16 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.13.3] - 2024-06-03
|
4
|
+
- New 🛠️ `Langchain::Tool::Tavily` to execute search (better than the GoogleSearch tool)
|
5
|
+
- Remove `activesupport` dependency
|
6
|
+
- Misc fixes and improvements
|
7
|
+
|
8
|
+
## [0.13.2] - 2024-05-20
|
9
|
+
- New `Langchain::LLM::GoogleGemini#embed()` method
|
10
|
+
- `Langchain::Assistant` works with `Langchain::LLM::Anthropic` llm
|
11
|
+
- New XLS file processor
|
12
|
+
- Fixes and improvements
|
13
|
+
|
3
14
|
## [0.13.1] - 2024-05-14
|
4
15
|
- Better error handling for `Langchain::LLM::GoogleVertexAI`
|
5
16
|
|
data/README.md
CHANGED
@@ -60,12 +60,13 @@ Langchain.rb wraps supported LLMs in a unified interface allowing you to easily
|
|
60
60
|
| [OpenAI](https://openai.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ❌ | Including Azure OpenAI |
|
61
61
|
| [AI21](https://ai21.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ | ❌ | ✅ | |
|
62
62
|
| [Anthropic](https://anthropic.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ | ✅ | ❌ | |
|
63
|
-
| [
|
63
|
+
| [AwsBedrock](https://aws.amazon.com/bedrock?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ❌ | Provides AWS, Cohere, AI21, Antropic and Stability AI models |
|
64
64
|
| [Cohere](https://cohere.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
|
65
65
|
| [GooglePalm](https://ai.google/discover/palm2?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
|
66
|
-
| [
|
66
|
+
| [GoogleVertexAI](https://cloud.google.com/vertex-ai?utm_source=langchainrb&utm_medium=github) | ✅ | ❌ | ✅ | ❌ | Requires Google Cloud service auth |
|
67
|
+
| [GoogleGemini](https://cloud.google.com/vertex-ai?utm_source=langchainrb&utm_medium=github) | ✅ | ❌ | ✅ | ❌ | Requires Gemini API Key (Limited to US) |
|
67
68
|
| [HuggingFace](https://huggingface.co/?utm_source=langchainrb&utm_medium=github) | ✅ | ❌ | ❌ | ❌ | |
|
68
|
-
| [
|
69
|
+
| [MistralAI](https://mistral.ai/?utm_source=langchainrb&utm_medium=github) | ✅ | ❌ | ✅ | ❌ | |
|
69
70
|
| [Ollama](https://ollama.ai/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
|
70
71
|
| [Replicate](https://replicate.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
|
71
72
|
|
@@ -413,12 +414,14 @@ Assistants are Agent-like objects that leverage helpful instructions, LLMs, tool
|
|
413
414
|
| "ruby_code_interpreter" | Interprets Ruby expressions | | `gem "safe_ruby", "~> 1.0.4"` |
|
414
415
|
| "google_search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
|
415
416
|
| "news_retriever" | A wrapper around NewsApi.org | `ENV["NEWS_API_KEY"]` (https://newsapi.org/) | |
|
417
|
+
| "tavily" | A wrapper around Tavily AI | `ENV["TAVILY_API_KEY"]` (https://tavily.com/) | |
|
416
418
|
| "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY"]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
|
417
419
|
| "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
|
418
420
|
|
419
421
|
### Demos
|
420
422
|
1. [Building an AI Assistant that operates a simulated E-commerce Store](https://www.loom.com/share/83aa4fd8dccb492aad4ca95da40ed0b2)
|
421
423
|
2. [New Langchain.rb Assistants interface](https://www.loom.com/share/e883a4a49b8746c1b0acf9d58cf6da36)
|
424
|
+
3. [Langchain.rb Assistant demo with NewsRetriever and function calling on Gemini](https://youtu.be/-ieyahrpDpM&t=1477s) - [code](https://github.com/palladius/gemini-news-crawler)
|
422
425
|
|
423
426
|
### Creating an Assistant
|
424
427
|
1. Instantiate an LLM of your choice
|
@@ -2,12 +2,24 @@
|
|
2
2
|
|
3
3
|
module Langchain
|
4
4
|
# Assistants are Agent-like objects that leverage helpful instructions, LLMs, tools and knowledge to respond to user queries.
|
5
|
-
# Assistants can be configured with an LLM of your choice
|
5
|
+
# Assistants can be configured with an LLM of your choice, any vector search database and easily extended with additional tools.
|
6
|
+
#
|
7
|
+
# Usage:
|
8
|
+
# llm = Langchain::LLM::GoogleGemini.new(api_key: ENV["GOOGLE_GEMINI_API_KEY"])
|
9
|
+
# assistant = Langchain::Assistant.new(
|
10
|
+
# llm: llm,
|
11
|
+
# instructions: "You're a News Reporter AI",
|
12
|
+
# tools: [Langchain::Tool::NewsRetriever.new(api_key: ENV["NEWS_API_KEY"])]
|
13
|
+
# )
|
6
14
|
class Assistant
|
15
|
+
extend Forwardable
|
16
|
+
def_delegators :thread, :messages, :messages=
|
17
|
+
|
7
18
|
attr_reader :llm, :thread, :instructions
|
8
19
|
attr_accessor :tools
|
9
20
|
|
10
21
|
SUPPORTED_LLMS = [
|
22
|
+
Langchain::LLM::Anthropic,
|
11
23
|
Langchain::LLM::OpenAI,
|
12
24
|
Langchain::LLM::GoogleGemini,
|
13
25
|
Langchain::LLM::GoogleVertexAI
|
@@ -21,27 +33,28 @@ module Langchain
|
|
21
33
|
# @param instructions [String] The system instructions to include in the thread
|
22
34
|
def initialize(
|
23
35
|
llm:,
|
24
|
-
thread
|
36
|
+
thread: nil,
|
25
37
|
tools: [],
|
26
38
|
instructions: nil
|
27
39
|
)
|
28
40
|
unless SUPPORTED_LLMS.include?(llm.class)
|
29
41
|
raise ArgumentError, "Invalid LLM; currently only #{SUPPORTED_LLMS.join(", ")} are supported"
|
30
42
|
end
|
31
|
-
raise ArgumentError, "Thread must be an instance of Langchain::Thread" unless thread.is_a?(Langchain::Thread)
|
32
43
|
raise ArgumentError, "Tools must be an array of Langchain::Tool::Base instance(s)" unless tools.is_a?(Array) && tools.all? { |tool| tool.is_a?(Langchain::Tool::Base) }
|
33
44
|
|
34
45
|
@llm = llm
|
35
|
-
@thread = thread
|
46
|
+
@thread = thread || Langchain::Thread.new
|
36
47
|
@tools = tools
|
37
48
|
@instructions = instructions
|
38
49
|
|
50
|
+
raise ArgumentError, "Thread must be an instance of Langchain::Thread" unless @thread.is_a?(Langchain::Thread)
|
51
|
+
|
39
52
|
# The first message in the thread should be the system instructions
|
40
53
|
# TODO: What if the user added old messages and the system instructions are already in there? Should this overwrite the existing instructions?
|
41
54
|
if llm.is_a?(Langchain::LLM::OpenAI)
|
42
55
|
add_message(role: "system", content: instructions) if instructions
|
43
56
|
end
|
44
|
-
# For Google Gemini, system instructions are added to the `system:` param in the `chat` method
|
57
|
+
# For Google Gemini, and Anthropic system instructions are added to the `system:` param in the `chat` method
|
45
58
|
end
|
46
59
|
|
47
60
|
# Add a user message to the thread
|
@@ -137,6 +150,8 @@ module Langchain
|
|
137
150
|
Langchain::Messages::OpenAIMessage::TOOL_ROLE
|
138
151
|
elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
|
139
152
|
Langchain::Messages::GoogleGeminiMessage::TOOL_ROLE
|
153
|
+
elsif llm.is_a?(Langchain::LLM::Anthropic)
|
154
|
+
Langchain::Messages::AnthropicMessage::TOOL_ROLE
|
140
155
|
end
|
141
156
|
|
142
157
|
# TODO: Validate that `tool_call_id` is valid by scanning messages and checking if this tool call ID was invoked
|
@@ -179,12 +194,17 @@ module Langchain
|
|
179
194
|
if tools.any?
|
180
195
|
if llm.is_a?(Langchain::LLM::OpenAI)
|
181
196
|
params[:tools] = tools.map(&:to_openai_tools).flatten
|
197
|
+
params[:tool_choice] = "auto"
|
198
|
+
elsif llm.is_a?(Langchain::LLM::Anthropic)
|
199
|
+
params[:tools] = tools.map(&:to_anthropic_tools).flatten
|
200
|
+
params[:system] = instructions if instructions
|
201
|
+
params[:tool_choice] = {type: "auto"}
|
182
202
|
elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
|
183
203
|
params[:tools] = tools.map(&:to_google_gemini_tools).flatten
|
184
204
|
params[:system] = instructions if instructions
|
205
|
+
params[:tool_choice] = "auto"
|
185
206
|
end
|
186
207
|
# TODO: Not sure that tool_choice should always be "auto"; Maybe we can let the user toggle it.
|
187
|
-
params[:tool_choice] = "auto"
|
188
208
|
end
|
189
209
|
|
190
210
|
llm.chat(**params)
|
@@ -200,6 +220,8 @@ module Langchain
|
|
200
220
|
extract_openai_tool_call(tool_call: tool_call)
|
201
221
|
elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
|
202
222
|
extract_google_gemini_tool_call(tool_call: tool_call)
|
223
|
+
elsif llm.is_a?(Langchain::LLM::Anthropic)
|
224
|
+
extract_anthropic_tool_call(tool_call: tool_call)
|
203
225
|
end
|
204
226
|
|
205
227
|
tool_instance = tools.find do |t|
|
@@ -234,6 +256,20 @@ module Langchain
|
|
234
256
|
[tool_call_id, tool_name, method_name, tool_arguments]
|
235
257
|
end
|
236
258
|
|
259
|
+
# Extract the tool call information from the Anthropic tool call hash
|
260
|
+
#
|
261
|
+
# @param tool_call [Hash] The tool call hash, format: {"type"=>"tool_use", "id"=>"toolu_01TjusbFApEbwKPRWTRwzadR", "name"=>"news_retriever__get_top_headlines", "input"=>{"country"=>"us", "page_size"=>10}}], "stop_reason"=>"tool_use"}
|
262
|
+
# @return [Array] The tool call information
|
263
|
+
def extract_anthropic_tool_call(tool_call:)
|
264
|
+
tool_call_id = tool_call.dig("id")
|
265
|
+
|
266
|
+
function_name = tool_call.dig("name")
|
267
|
+
tool_name, method_name = function_name.split("__")
|
268
|
+
tool_arguments = tool_call.dig("input").transform_keys(&:to_sym)
|
269
|
+
|
270
|
+
[tool_call_id, tool_name, method_name, tool_arguments]
|
271
|
+
end
|
272
|
+
|
237
273
|
# Extract the tool call information from the Google Gemini tool call hash
|
238
274
|
#
|
239
275
|
# @param tool_call [Hash] The tool call hash, format: {"functionCall"=>{"name"=>"weather__execute", "args"=>{"input"=>"NYC"}}}
|
@@ -260,6 +296,8 @@ module Langchain
|
|
260
296
|
Langchain::Messages::OpenAIMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
|
261
297
|
elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
|
262
298
|
Langchain::Messages::GoogleGeminiMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
|
299
|
+
elsif llm.is_a?(Langchain::LLM::Anthropic)
|
300
|
+
Langchain::Messages::AnthropicMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
|
263
301
|
end
|
264
302
|
end
|
265
303
|
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Messages
|
5
|
+
class AnthropicMessage < Base
|
6
|
+
ROLES = [
|
7
|
+
"assistant",
|
8
|
+
"user",
|
9
|
+
"tool_result"
|
10
|
+
].freeze
|
11
|
+
|
12
|
+
TOOL_ROLE = "tool_result"
|
13
|
+
|
14
|
+
def initialize(role:, content: nil, tool_calls: [], tool_call_id: nil)
|
15
|
+
raise ArgumentError, "Role must be one of #{ROLES.join(", ")}" unless ROLES.include?(role)
|
16
|
+
raise ArgumentError, "Tool calls must be an array of hashes" unless tool_calls.is_a?(Array) && tool_calls.all? { |tool_call| tool_call.is_a?(Hash) }
|
17
|
+
|
18
|
+
@role = role
|
19
|
+
# Some Tools return content as a JSON hence `.to_s`
|
20
|
+
@content = content.to_s
|
21
|
+
@tool_calls = tool_calls
|
22
|
+
@tool_call_id = tool_call_id
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convert the message to an Anthropic API-compatible hash
|
26
|
+
#
|
27
|
+
# @return [Hash] The message as an Anthropic API-compatible hash
|
28
|
+
def to_hash
|
29
|
+
{}.tap do |h|
|
30
|
+
h[:role] = tool? ? "user" : role
|
31
|
+
|
32
|
+
h[:content] = if tool?
|
33
|
+
[
|
34
|
+
{
|
35
|
+
type: "tool_result",
|
36
|
+
tool_use_id: tool_call_id,
|
37
|
+
content: content
|
38
|
+
}
|
39
|
+
]
|
40
|
+
elsif tool_calls.any?
|
41
|
+
tool_calls
|
42
|
+
else
|
43
|
+
content
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Check if the message is a tool call
|
49
|
+
#
|
50
|
+
# @return [Boolean] true/false whether this message is a tool call
|
51
|
+
def tool?
|
52
|
+
role == "tool_result"
|
53
|
+
end
|
54
|
+
|
55
|
+
# Anthropic does not implement system prompts
|
56
|
+
def system?
|
57
|
+
false
|
58
|
+
end
|
59
|
+
|
60
|
+
# Check if the message came from an LLM
|
61
|
+
#
|
62
|
+
# @return [Boolean] true/false whether this message was produced by an LLM
|
63
|
+
def assistant?
|
64
|
+
role == "assistant"
|
65
|
+
end
|
66
|
+
|
67
|
+
# Check if the message came from an LLM
|
68
|
+
#
|
69
|
+
# @return [Boolean] true/false whether this message was produced by an LLM
|
70
|
+
def llm?
|
71
|
+
assistant?
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -101,6 +101,8 @@ module Langchain::LLM
|
|
101
101
|
# @option params [Float] :top_p Use nucleus sampling.
|
102
102
|
# @return [Langchain::LLM::AnthropicResponse] The chat completion
|
103
103
|
def chat(params = {})
|
104
|
+
set_extra_headers! if params[:tools]
|
105
|
+
|
104
106
|
parameters = chat_parameters.to_params(params)
|
105
107
|
|
106
108
|
raise ArgumentError.new("messages argument is required") if Array(parameters[:messages]).empty?
|
@@ -111,5 +113,11 @@ module Langchain::LLM
|
|
111
113
|
|
112
114
|
Langchain::LLM::AnthropicResponse.new(response)
|
113
115
|
end
|
116
|
+
|
117
|
+
private
|
118
|
+
|
119
|
+
def set_extra_headers!
|
120
|
+
::Anthropic.configuration.extra_headers = {"anthropic-beta": "tools-2024-05-16"}
|
121
|
+
end
|
114
122
|
end
|
115
123
|
end
|
@@ -135,22 +135,43 @@ module Langchain::LLM
|
|
135
135
|
# @option params [Float] :temperature The temperature to use for completion
|
136
136
|
# @option params [Float] :top_p Use nucleus sampling.
|
137
137
|
# @option params [Integer] :top_k Only sample from the top K options for each subsequent token
|
138
|
-
# @
|
139
|
-
|
138
|
+
# @yield [Hash] Provides chunks of the response as they are received
|
139
|
+
# @return [Langchain::LLM::AnthropicResponse] Response object
|
140
|
+
def chat(params = {}, &block)
|
140
141
|
parameters = chat_parameters.to_params(params)
|
141
142
|
|
142
143
|
raise ArgumentError.new("messages argument is required") if Array(parameters[:messages]).empty?
|
143
144
|
|
144
145
|
raise "Model #{parameters[:model]} does not support chat completions." unless Langchain::LLM::AwsBedrock::SUPPORTED_CHAT_COMPLETION_PROVIDERS.include?(completion_provider)
|
145
146
|
|
146
|
-
|
147
|
-
|
148
|
-
body: parameters.except(:model).to_json,
|
149
|
-
content_type: "application/json",
|
150
|
-
accept: "application/json"
|
151
|
-
})
|
147
|
+
if block
|
148
|
+
response_chunks = []
|
152
149
|
|
153
|
-
|
150
|
+
client.invoke_model_with_response_stream(
|
151
|
+
model_id: parameters[:model],
|
152
|
+
body: parameters.except(:model).to_json,
|
153
|
+
content_type: "application/json",
|
154
|
+
accept: "application/json"
|
155
|
+
) do |stream|
|
156
|
+
stream.on_event do |event|
|
157
|
+
chunk = JSON.parse(event.bytes)
|
158
|
+
response_chunks << chunk
|
159
|
+
|
160
|
+
yield chunk
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
response_from_chunks(response_chunks)
|
165
|
+
else
|
166
|
+
response = client.invoke_model({
|
167
|
+
model_id: parameters[:model],
|
168
|
+
body: parameters.except(:model).to_json,
|
169
|
+
content_type: "application/json",
|
170
|
+
accept: "application/json"
|
171
|
+
})
|
172
|
+
|
173
|
+
parse_response response
|
174
|
+
end
|
154
175
|
end
|
155
176
|
|
156
177
|
private
|
@@ -260,5 +281,37 @@ module Langchain::LLM
|
|
260
281
|
}
|
261
282
|
}
|
262
283
|
end
|
284
|
+
|
285
|
+
def response_from_chunks(chunks)
|
286
|
+
raw_response = {}
|
287
|
+
|
288
|
+
chunks.group_by { |chunk| chunk["type"] }.each do |type, chunks|
|
289
|
+
case type
|
290
|
+
when "message_start"
|
291
|
+
raw_response = chunks.first["message"]
|
292
|
+
when "content_block_start"
|
293
|
+
raw_response["content"] = chunks.map { |chunk| chunk["content_block"] }
|
294
|
+
when "content_block_delta"
|
295
|
+
chunks.group_by { |chunk| chunk["index"] }.each do |index, deltas|
|
296
|
+
deltas.group_by { |delta| delta.dig("delta", "type") }.each do |type, deltas|
|
297
|
+
case type
|
298
|
+
when "text_delta"
|
299
|
+
raw_response["content"][index]["text"] = deltas.map { |delta| delta.dig("delta", "text") }.join
|
300
|
+
when "input_json_delta"
|
301
|
+
json_string = deltas.map { |delta| delta.dig("delta", "partial_json") }.join
|
302
|
+
raw_response["content"][index]["input"] = json_string.empty? ? {} : JSON.parse(json_string)
|
303
|
+
end
|
304
|
+
end
|
305
|
+
end
|
306
|
+
when "message_delta"
|
307
|
+
chunks.each do |chunk|
|
308
|
+
raw_response = raw_response.merge(chunk["delta"])
|
309
|
+
raw_response["usage"] = raw_response["usage"].merge(chunk["usage"]) if chunk["usage"]
|
310
|
+
end
|
311
|
+
end
|
312
|
+
end
|
313
|
+
|
314
|
+
Langchain::LLM::AnthropicResponse.new(raw_response)
|
315
|
+
end
|
263
316
|
end
|
264
317
|
end
|
@@ -6,6 +6,7 @@ module Langchain::LLM
|
|
6
6
|
class GoogleGemini < Base
|
7
7
|
DEFAULTS = {
|
8
8
|
chat_completion_model_name: "gemini-1.5-pro-latest",
|
9
|
+
embeddings_model_name: "text-embedding-004",
|
9
10
|
temperature: 0.0
|
10
11
|
}
|
11
12
|
|
@@ -63,5 +64,35 @@ module Langchain::LLM
|
|
63
64
|
raise StandardError.new(response)
|
64
65
|
end
|
65
66
|
end
|
67
|
+
|
68
|
+
def embed(
|
69
|
+
text:,
|
70
|
+
model: @defaults[:embeddings_model_name]
|
71
|
+
)
|
72
|
+
|
73
|
+
params = {
|
74
|
+
content: {
|
75
|
+
parts: [
|
76
|
+
{
|
77
|
+
text: text
|
78
|
+
}
|
79
|
+
]
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
uri = URI("https://generativelanguage.googleapis.com/v1beta/models/#{model}:embedContent?key=#{api_key}")
|
84
|
+
|
85
|
+
request = Net::HTTP::Post.new(uri)
|
86
|
+
request.content_type = "application/json"
|
87
|
+
request.body = params.to_json
|
88
|
+
|
89
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https") do |http|
|
90
|
+
http.request(request)
|
91
|
+
end
|
92
|
+
|
93
|
+
parsed_response = JSON.parse(response.body)
|
94
|
+
|
95
|
+
Langchain::LLM::GoogleGeminiResponse.new(parsed_response, model: model)
|
96
|
+
end
|
66
97
|
end
|
67
98
|
end
|
@@ -28,7 +28,10 @@ module Langchain::LLM
|
|
28
28
|
def initialize(project_id:, region:, default_options: {})
|
29
29
|
depends_on "googleauth"
|
30
30
|
|
31
|
-
@authorizer = ::Google::Auth.get_application_default
|
31
|
+
@authorizer = ::Google::Auth.get_application_default(scope: [
|
32
|
+
"https://www.googleapis.com/auth/cloud-platform",
|
33
|
+
"https://www.googleapis.com/auth/generative-language.retriever"
|
34
|
+
])
|
32
35
|
proj_id = project_id || @authorizer.project_id || @authorizer.quota_project_id
|
33
36
|
@url = "https://#{region}-aiplatform.googleapis.com/v1/projects/#{proj_id}/locations/#{region}/publishers/google/models/"
|
34
37
|
|
@@ -11,12 +11,12 @@ module Langchain::LLM
|
|
11
11
|
# hf = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
|
12
12
|
#
|
13
13
|
class HuggingFace < Base
|
14
|
-
# The gem does not currently accept other models:
|
15
|
-
# https://github.com/alchaplinsky/hugging-face/blob/main/lib/hugging_face/inference_api.rb#L32-L34
|
16
14
|
DEFAULTS = {
|
17
|
-
|
18
|
-
|
19
|
-
|
15
|
+
embeddings_model_name: "sentence-transformers/all-MiniLM-L6-v2"
|
16
|
+
}.freeze
|
17
|
+
|
18
|
+
EMBEDDING_SIZES = {
|
19
|
+
"sentence-transformers/all-MiniLM-L6-v2": 384
|
20
20
|
}.freeze
|
21
21
|
|
22
22
|
#
|
@@ -24,10 +24,21 @@ module Langchain::LLM
|
|
24
24
|
#
|
25
25
|
# @param api_key [String] The API key to use
|
26
26
|
#
|
27
|
-
def initialize(api_key:)
|
27
|
+
def initialize(api_key:, default_options: {})
|
28
28
|
depends_on "hugging-face", req: "hugging_face"
|
29
29
|
|
30
30
|
@client = ::HuggingFace::InferenceApi.new(api_token: api_key)
|
31
|
+
@defaults = DEFAULTS.merge(default_options)
|
32
|
+
end
|
33
|
+
|
34
|
+
# Returns the # of vector dimensions for the embeddings
|
35
|
+
# @return [Integer] The # of vector dimensions
|
36
|
+
def default_dimensions
|
37
|
+
# since Huggin Face can run multiple models, look it up or generate an embedding and return the size
|
38
|
+
@default_dimensions ||= @defaults[:dimensions] ||
|
39
|
+
EMBEDDING_SIZES.fetch(@defaults[:embeddings_model_name].to_sym) do
|
40
|
+
embed(text: "test").embedding.size
|
41
|
+
end
|
31
42
|
end
|
32
43
|
|
33
44
|
#
|
@@ -39,9 +50,9 @@ module Langchain::LLM
|
|
39
50
|
def embed(text:)
|
40
51
|
response = client.embedding(
|
41
52
|
input: text,
|
42
|
-
model:
|
53
|
+
model: @defaults[:embeddings_model_name]
|
43
54
|
)
|
44
|
-
Langchain::LLM::HuggingFaceResponse.new(response, model:
|
55
|
+
Langchain::LLM::HuggingFaceResponse.new(response, model: @defaults[:embeddings_model_name])
|
45
56
|
end
|
46
57
|
end
|
47
58
|
end
|
data/lib/langchain/llm/ollama.rb
CHANGED
@@ -65,8 +65,14 @@ module Langchain::LLM
|
|
65
65
|
# @param model [String] The model to use
|
66
66
|
# For a list of valid parameters and values, see:
|
67
67
|
# https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
|
68
|
+
# @option block [Proc] Receive the intermediate responses as a stream of +OllamaResponse+ objects.
|
68
69
|
# @return [Langchain::LLM::OllamaResponse] Response object
|
69
70
|
#
|
71
|
+
# Example:
|
72
|
+
#
|
73
|
+
# final_resp = ollama.complete(prompt:) { |resp| print resp.completion }
|
74
|
+
# final_resp.total_tokens
|
75
|
+
#
|
70
76
|
def complete(
|
71
77
|
prompt:,
|
72
78
|
model: defaults[:completion_model_name],
|
@@ -75,7 +81,6 @@ module Langchain::LLM
|
|
75
81
|
system: nil,
|
76
82
|
template: nil,
|
77
83
|
context: nil,
|
78
|
-
stream: nil,
|
79
84
|
raw: nil,
|
80
85
|
mirostat: nil,
|
81
86
|
mirostat_eta: nil,
|
@@ -108,7 +113,7 @@ module Langchain::LLM
|
|
108
113
|
system: system,
|
109
114
|
template: template,
|
110
115
|
context: context,
|
111
|
-
stream:
|
116
|
+
stream: block.present?,
|
112
117
|
raw: raw
|
113
118
|
}.compact
|
114
119
|
|
@@ -132,53 +137,54 @@ module Langchain::LLM
|
|
132
137
|
}
|
133
138
|
|
134
139
|
parameters[:options] = llm_parameters.compact
|
140
|
+
responses_stream = []
|
135
141
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
req.body = parameters
|
142
|
+
client.post("api/generate", parameters) do |req|
|
143
|
+
req.options.on_data = json_responses_chunk_handler do |parsed_chunk|
|
144
|
+
responses_stream << parsed_chunk
|
140
145
|
|
141
|
-
|
142
|
-
chunk.split("\n").each do |line_chunk|
|
143
|
-
json_chunk = begin
|
144
|
-
JSON.parse(line_chunk)
|
145
|
-
# In some instance the chunk exceeds the buffer size and the JSON parser fails
|
146
|
-
rescue JSON::ParserError
|
147
|
-
nil
|
148
|
-
end
|
149
|
-
|
150
|
-
response += json_chunk.dig("response") unless json_chunk.blank?
|
151
|
-
end
|
152
|
-
|
153
|
-
yield json_chunk, size if block
|
146
|
+
block&.call(OllamaResponse.new(parsed_chunk, model: parameters[:model]))
|
154
147
|
end
|
155
148
|
end
|
156
149
|
|
157
|
-
|
150
|
+
generate_final_completion_response(responses_stream, parameters)
|
158
151
|
end
|
159
152
|
|
160
153
|
# Generate a chat completion
|
161
154
|
#
|
162
|
-
# @param [
|
163
|
-
# @
|
155
|
+
# @param messages [Array] The chat messages
|
156
|
+
# @param model [String] The model to use
|
157
|
+
# @param params [Hash] Unified chat parmeters from [Langchain::LLM::Parameters::Chat::SCHEMA]
|
164
158
|
# @option params [Array<Hash>] :messages Array of messages
|
159
|
+
# @option params [String] :model Model name
|
165
160
|
# @option params [String] :format Format to return a response in. Currently the only accepted value is `json`
|
166
161
|
# @option params [Float] :temperature The temperature to use
|
167
162
|
# @option params [String] :template The prompt template to use (overrides what is defined in the `Modelfile`)
|
168
|
-
# @option
|
163
|
+
# @option block [Proc] Receive the intermediate responses as a stream of +OllamaResponse+ objects.
|
164
|
+
# @return [Langchain::LLM::OllamaResponse] Response object
|
165
|
+
#
|
166
|
+
# Example:
|
167
|
+
#
|
168
|
+
# final_resp = ollama.chat(messages:) { |resp| print resp.chat_completion }
|
169
|
+
# final_resp.total_tokens
|
169
170
|
#
|
170
171
|
# The message object has the following fields:
|
171
172
|
# role: the role of the message, either system, user or assistant
|
172
173
|
# content: the content of the message
|
173
174
|
# images (optional): a list of images to include in the message (for multimodal models such as llava)
|
174
|
-
def chat(params
|
175
|
-
parameters = chat_parameters.to_params(params)
|
175
|
+
def chat(messages:, model: nil, **params, &block)
|
176
|
+
parameters = chat_parameters.to_params(params.merge(messages:, model:, stream: block.present?))
|
177
|
+
responses_stream = []
|
176
178
|
|
177
|
-
|
178
|
-
req.
|
179
|
+
client.post("api/chat", parameters) do |req|
|
180
|
+
req.options.on_data = json_responses_chunk_handler do |parsed_chunk|
|
181
|
+
responses_stream << parsed_chunk
|
182
|
+
|
183
|
+
block&.call(OllamaResponse.new(parsed_chunk, model: parameters[:model]))
|
184
|
+
end
|
179
185
|
end
|
180
186
|
|
181
|
-
|
187
|
+
generate_final_chat_completion_response(responses_stream, parameters)
|
182
188
|
end
|
183
189
|
|
184
190
|
#
|
@@ -239,7 +245,7 @@ module Langchain::LLM
|
|
239
245
|
req.body = parameters
|
240
246
|
end
|
241
247
|
|
242
|
-
|
248
|
+
OllamaResponse.new(response.body, model: parameters[:model])
|
243
249
|
end
|
244
250
|
|
245
251
|
# Generate a summary for a given text
|
@@ -257,7 +263,6 @@ module Langchain::LLM
|
|
257
263
|
|
258
264
|
private
|
259
265
|
|
260
|
-
# @return [Faraday::Connection] Faraday client
|
261
266
|
def client
|
262
267
|
@client ||= Faraday.new(url: url) do |conn|
|
263
268
|
conn.request :json
|
@@ -265,5 +270,33 @@ module Langchain::LLM
|
|
265
270
|
conn.response :raise_error
|
266
271
|
end
|
267
272
|
end
|
273
|
+
|
274
|
+
def json_responses_chunk_handler(&block)
|
275
|
+
proc do |chunk, _size|
|
276
|
+
chunk.split("\n").each do |chunk_line|
|
277
|
+
parsed_chunk = JSON.parse(chunk_line)
|
278
|
+
block.call(parsed_chunk)
|
279
|
+
end
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
def generate_final_completion_response(responses_stream, parameters)
|
284
|
+
final_response = responses_stream.last.merge(
|
285
|
+
"response" => responses_stream.map { |resp| resp["response"] }.join
|
286
|
+
)
|
287
|
+
|
288
|
+
OllamaResponse.new(final_response, model: parameters[:model])
|
289
|
+
end
|
290
|
+
|
291
|
+
def generate_final_chat_completion_response(responses_stream, parameters)
|
292
|
+
final_response = responses_stream.last.merge(
|
293
|
+
"message" => {
|
294
|
+
"role" => "assistant",
|
295
|
+
"content" => responses_stream.map { |resp| resp.dig("message", "content") }.join
|
296
|
+
}
|
297
|
+
)
|
298
|
+
|
299
|
+
OllamaResponse.new(final_response, model: parameters[:model])
|
300
|
+
end
|
268
301
|
end
|
269
302
|
end
|
data/lib/langchain/llm/openai.rb
CHANGED
@@ -26,8 +26,6 @@ module Langchain::LLM
|
|
26
26
|
"text-embedding-3-small" => 1536
|
27
27
|
}.freeze
|
28
28
|
|
29
|
-
LENGTH_VALIDATOR = Langchain::Utils::TokenLength::OpenAIValidator
|
30
|
-
|
31
29
|
attr_reader :defaults
|
32
30
|
|
33
31
|
# Initialize an OpenAI LLM instance
|
@@ -82,8 +80,6 @@ module Langchain::LLM
|
|
82
80
|
parameters[:dimensions] = EMBEDDING_SIZES[model]
|
83
81
|
end
|
84
82
|
|
85
|
-
validate_max_tokens(text, parameters[:model])
|
86
|
-
|
87
83
|
response = with_api_error_handling do
|
88
84
|
client.embeddings(parameters: parameters)
|
89
85
|
end
|
@@ -177,10 +173,6 @@ module Langchain::LLM
|
|
177
173
|
response
|
178
174
|
end
|
179
175
|
|
180
|
-
def validate_max_tokens(messages, model, max_tokens = nil)
|
181
|
-
LENGTH_VALIDATOR.validate_max_tokens!(messages, model, max_tokens: max_tokens, llm: self)
|
182
|
-
end
|
183
|
-
|
184
176
|
def response_from_chunks
|
185
177
|
grouped_chunks = @response_chunks.group_by { |chunk| chunk.dig("choices", 0, "index") }
|
186
178
|
final_choices = grouped_chunks.map do |index, chunks|
|
@@ -188,12 +180,31 @@ module Langchain::LLM
|
|
188
180
|
"index" => index,
|
189
181
|
"message" => {
|
190
182
|
"role" => "assistant",
|
191
|
-
"content" => chunks.map { |chunk| chunk.dig("choices", 0, "delta", "content") }.join
|
192
|
-
|
183
|
+
"content" => chunks.map { |chunk| chunk.dig("choices", 0, "delta", "content") }.join,
|
184
|
+
"tool_calls" => tool_calls_from_choice_chunks(chunks)
|
185
|
+
}.compact,
|
193
186
|
"finish_reason" => chunks.last.dig("choices", 0, "finish_reason")
|
194
187
|
}
|
195
188
|
end
|
196
189
|
@response_chunks.first&.slice("id", "object", "created", "model")&.merge({"choices" => final_choices})
|
197
190
|
end
|
191
|
+
|
192
|
+
def tool_calls_from_choice_chunks(choice_chunks)
|
193
|
+
tool_call_chunks = choice_chunks.select { |chunk| chunk.dig("choices", 0, "delta", "tool_calls") }
|
194
|
+
return nil if tool_call_chunks.empty?
|
195
|
+
|
196
|
+
tool_call_chunks.group_by { |chunk| chunk.dig("choices", 0, "delta", "tool_calls", 0, "index") }.map do |index, chunks|
|
197
|
+
first_chunk = chunks.first
|
198
|
+
|
199
|
+
{
|
200
|
+
"id" => first_chunk.dig("choices", 0, "delta", "tool_calls", 0, "id"),
|
201
|
+
"type" => first_chunk.dig("choices", 0, "delta", "tool_calls", 0, "type"),
|
202
|
+
"function" => {
|
203
|
+
"name" => first_chunk.dig("choices", 0, "delta", "tool_calls", 0, "function", "name"),
|
204
|
+
"arguments" => chunks.map { |chunk| chunk.dig("choices", 0, "delta", "tool_calls", 0, "function", "arguments") }.join
|
205
|
+
}
|
206
|
+
}
|
207
|
+
end
|
208
|
+
end
|
198
209
|
end
|
199
210
|
end
|
@@ -11,7 +11,17 @@ module Langchain::LLM
|
|
11
11
|
end
|
12
12
|
|
13
13
|
def chat_completion
|
14
|
-
|
14
|
+
chat_completion = chat_completions.find { |h| h["type"] == "text" }
|
15
|
+
chat_completion&.dig("text")
|
16
|
+
end
|
17
|
+
|
18
|
+
def tool_calls
|
19
|
+
tool_call = chat_completions.find { |h| h["type"] == "tool_use" }
|
20
|
+
tool_call ? [tool_call] : []
|
21
|
+
end
|
22
|
+
|
23
|
+
def chat_completions
|
24
|
+
raw_response.dig("content")
|
15
25
|
end
|
16
26
|
|
17
27
|
def completions
|
@@ -27,7 +27,11 @@ module Langchain::LLM
|
|
27
27
|
end
|
28
28
|
|
29
29
|
def embeddings
|
30
|
-
|
30
|
+
if raw_response.key?("embedding")
|
31
|
+
[raw_response.dig("embedding", "values")]
|
32
|
+
else
|
33
|
+
[raw_response.dig("predictions", 0, "embeddings", "values")]
|
34
|
+
end
|
31
35
|
end
|
32
36
|
|
33
37
|
def prompt_tokens
|
@@ -8,9 +8,7 @@ module Langchain::LLM
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def created_at
|
11
|
-
if raw_response.dig("created_at")
|
12
|
-
Time.parse(raw_response.dig("created_at"))
|
13
|
-
end
|
11
|
+
Time.parse(raw_response.dig("created_at")) if raw_response.dig("created_at")
|
14
12
|
end
|
15
13
|
|
16
14
|
def chat_completion
|
@@ -18,11 +16,11 @@ module Langchain::LLM
|
|
18
16
|
end
|
19
17
|
|
20
18
|
def completion
|
21
|
-
|
19
|
+
raw_response.dig("response")
|
22
20
|
end
|
23
21
|
|
24
22
|
def completions
|
25
|
-
|
23
|
+
[completion].compact
|
26
24
|
end
|
27
25
|
|
28
26
|
def embedding
|
@@ -38,15 +36,21 @@ module Langchain::LLM
|
|
38
36
|
end
|
39
37
|
|
40
38
|
def prompt_tokens
|
41
|
-
raw_response.dig("prompt_eval_count")
|
39
|
+
raw_response.dig("prompt_eval_count") if done?
|
42
40
|
end
|
43
41
|
|
44
42
|
def completion_tokens
|
45
|
-
raw_response.dig("eval_count")
|
43
|
+
raw_response.dig("eval_count") if done?
|
46
44
|
end
|
47
45
|
|
48
46
|
def total_tokens
|
49
|
-
prompt_tokens + completion_tokens
|
47
|
+
prompt_tokens + completion_tokens if done?
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def done?
|
53
|
+
!!raw_response["done"]
|
50
54
|
end
|
51
55
|
end
|
52
56
|
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Processors
|
5
|
+
class Xls < Base
|
6
|
+
EXTENSIONS = [".xls"].freeze
|
7
|
+
CONTENT_TYPES = ["application/vnd.ms-excel"].freeze
|
8
|
+
|
9
|
+
def initialize(*)
|
10
|
+
depends_on "roo"
|
11
|
+
depends_on "roo-xls"
|
12
|
+
end
|
13
|
+
|
14
|
+
# Parse the document and return the text
|
15
|
+
# @param [File] data
|
16
|
+
# @return [Array<Array<String>>] Array of rows, each row is an array of cells
|
17
|
+
def parse(data)
|
18
|
+
xls_file = Roo::Spreadsheet.open(data)
|
19
|
+
xls_file.each_with_pagename.flat_map do |_, sheet|
|
20
|
+
sheet.map do |row|
|
21
|
+
row.map { |i| i.to_s.strip }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
data/lib/langchain/tool/base.rb
CHANGED
@@ -71,6 +71,18 @@ module Langchain::Tool
|
|
71
71
|
method_annotations
|
72
72
|
end
|
73
73
|
|
74
|
+
# Returns the tool as a list of Anthropic formatted functions
|
75
|
+
#
|
76
|
+
# @return [Array<Hash>] List of hashes representing the tool as Anthropic formatted functions
|
77
|
+
def to_anthropic_tools
|
78
|
+
method_annotations.map do |annotation|
|
79
|
+
# Slice out only the content of the "function" key
|
80
|
+
annotation["function"]
|
81
|
+
# Rename "parameters" to "input_schema" key
|
82
|
+
.transform_keys("parameters" => "input_schema")
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
74
86
|
# Returns the tool as a list of Google Gemini formatted functions
|
75
87
|
#
|
76
88
|
# @return [Array<Hash>] List of hashes representing the tool as Google Gemini formatted functions
|
@@ -68,7 +68,8 @@
|
|
68
68
|
"properties": {
|
69
69
|
"country": {
|
70
70
|
"type": "string",
|
71
|
-
"description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for."
|
71
|
+
"description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for.",
|
72
|
+
"enum": ["ae", "ar", "at", "au", "be", "bg", "br", "ca", "ch", "cn", "co", "cu", "cz", "de", "eg", "fr", "gb", "gr", "hk", "hu", "id", "ie", "il", "in", "it", "jp", "kr", "lt", "lv", "ma", "mx", "my", "ng", "nl", "no", "nz", "ph", "pl", "pt", "ro", "rs", "ru", "sa", "se", "sg", "si", "sk", "th", "tr", "tw", "ua", "us", "ve", "za"]
|
72
73
|
},
|
73
74
|
"category": {
|
74
75
|
"type": "string",
|
@@ -0,0 +1,54 @@
|
|
1
|
+
[
|
2
|
+
{
|
3
|
+
"type": "function",
|
4
|
+
"function": {
|
5
|
+
"name": "tavily__search",
|
6
|
+
"description": "Tavily Tool: Robust search API",
|
7
|
+
"parameters": {
|
8
|
+
"type": "object",
|
9
|
+
"properties": {
|
10
|
+
"query": {
|
11
|
+
"type": "string",
|
12
|
+
"description": "The search query string"
|
13
|
+
},
|
14
|
+
"search_depth": {
|
15
|
+
"type": "string",
|
16
|
+
"description": "The depth of the search: basic for quick results and advanced for indepth high quality results but longer response time",
|
17
|
+
"enum": ["basic", "advanced"]
|
18
|
+
},
|
19
|
+
"include_images": {
|
20
|
+
"type": "boolean",
|
21
|
+
"description": "Include a list of query related images in the response"
|
22
|
+
},
|
23
|
+
"include_answer": {
|
24
|
+
"type": "boolean",
|
25
|
+
"description": "Include answers in the search results"
|
26
|
+
},
|
27
|
+
"include_raw_content": {
|
28
|
+
"type": "boolean",
|
29
|
+
"description": "Include raw content in the search results"
|
30
|
+
},
|
31
|
+
"max_results": {
|
32
|
+
"type": "integer",
|
33
|
+
"description": "The number of maximum search results to return"
|
34
|
+
},
|
35
|
+
"include_domains": {
|
36
|
+
"type": "array",
|
37
|
+
"items": {
|
38
|
+
"type": "string"
|
39
|
+
},
|
40
|
+
"description": "A list of domains to specifically include in the search results"
|
41
|
+
},
|
42
|
+
"exclude_domains": {
|
43
|
+
"type": "array",
|
44
|
+
"items": {
|
45
|
+
"type": "string"
|
46
|
+
},
|
47
|
+
"description": "A list of domains to specifically exclude from the search results"
|
48
|
+
}
|
49
|
+
},
|
50
|
+
"required": ["query"]
|
51
|
+
}
|
52
|
+
}
|
53
|
+
}
|
54
|
+
]
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::Tool
|
4
|
+
class Tavily < Base
|
5
|
+
#
|
6
|
+
# Tavily Search is a robust search API tailored specifically for LLM Agents.
|
7
|
+
# It seamlessly integrates with diverse data sources to ensure a superior, relevant search experience.
|
8
|
+
#
|
9
|
+
# Usage:
|
10
|
+
# tavily = Langchain::Tool::Tavily.new(api_key: ENV["TAVILY_API_KEY"])
|
11
|
+
#
|
12
|
+
NAME = "tavily"
|
13
|
+
ANNOTATIONS_PATH = Langchain.root.join("./langchain/tool/#{NAME}/#{NAME}.json").to_path
|
14
|
+
|
15
|
+
def initialize(api_key:)
|
16
|
+
@api_key = api_key
|
17
|
+
end
|
18
|
+
|
19
|
+
# Search for data based on a query.
|
20
|
+
#
|
21
|
+
# @param query [String] The search query string.
|
22
|
+
# @param search_depth [String] The depth of the search. It can be basic or advanced. Default is basic for quick results and advanced for indepth high quality results but longer response time. Advanced calls equals 2 requests.
|
23
|
+
# @param include_images [Boolean] Include a list of query related images in the response. Default is False.
|
24
|
+
# @param include_answer [Boolean] Include answers in the search results. Default is False.
|
25
|
+
# @param include_raw_content [Boolean] Include raw content in the search results. Default is False.
|
26
|
+
# @param max_results [Integer] The number of maximum search results to return. Default is 5.
|
27
|
+
# @param include_domains [Array<String>] A list of domains to specifically include in the search results. Default is None, which includes all domains.
|
28
|
+
# @param exclude_domains [Array<String>] A list of domains to specifically exclude from the search results. Default is None, which doesn't exclude any domains.
|
29
|
+
#
|
30
|
+
# @return [String] The search results in JSON format.
|
31
|
+
def search(
|
32
|
+
query:,
|
33
|
+
search_depth: "basic",
|
34
|
+
include_images: false,
|
35
|
+
include_answer: false,
|
36
|
+
include_raw_content: false,
|
37
|
+
max_results: 5,
|
38
|
+
include_domains: [],
|
39
|
+
exclude_domains: []
|
40
|
+
)
|
41
|
+
uri = URI("https://api.tavily.com/search")
|
42
|
+
request = Net::HTTP::Post.new(uri)
|
43
|
+
request.content_type = "application/json"
|
44
|
+
request.body = {
|
45
|
+
api_key: @api_key,
|
46
|
+
query: query,
|
47
|
+
search_depth: search_depth,
|
48
|
+
include_images: include_images,
|
49
|
+
include_answer: include_answer,
|
50
|
+
include_raw_content: include_raw_content,
|
51
|
+
max_results: max_results,
|
52
|
+
include_domains: include_domains,
|
53
|
+
exclude_domains: exclude_domains
|
54
|
+
}.to_json
|
55
|
+
|
56
|
+
response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https") do |http|
|
57
|
+
http.request(request)
|
58
|
+
end
|
59
|
+
response.body
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
@@ -34,6 +34,7 @@ loader.collapse("#{__dir__}/langchain/tool/file_system")
|
|
34
34
|
loader.collapse("#{__dir__}/langchain/tool/google_search")
|
35
35
|
loader.collapse("#{__dir__}/langchain/tool/ruby_code_interpreter")
|
36
36
|
loader.collapse("#{__dir__}/langchain/tool/news_retriever")
|
37
|
+
loader.collapse("#{__dir__}/langchain/tool/tavily")
|
37
38
|
loader.collapse("#{__dir__}/langchain/tool/vectorsearch")
|
38
39
|
loader.collapse("#{__dir__}/langchain/tool/weather")
|
39
40
|
loader.collapse("#{__dir__}/langchain/tool/wikipedia")
|
metadata
CHANGED
@@ -1,29 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
-
- !ruby/object:Gem::Dependency
|
14
|
-
name: activesupport
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - ">="
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: 7.0.8
|
20
|
-
type: :runtime
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - ">="
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: 7.0.8
|
27
13
|
- !ruby/object:Gem::Dependency
|
28
14
|
name: baran
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,20 +38,6 @@ dependencies:
|
|
52
38
|
- - "~>"
|
53
39
|
- !ruby/object:Gem::Version
|
54
40
|
version: 1.1.0
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: tiktoken_ruby
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: 0.0.8
|
62
|
-
type: :runtime
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: 0.0.8
|
69
41
|
- !ruby/object:Gem::Dependency
|
70
42
|
name: json-schema
|
71
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -346,6 +318,20 @@ dependencies:
|
|
346
318
|
- - "~>"
|
347
319
|
- !ruby/object:Gem::Version
|
348
320
|
version: 1.6.5
|
321
|
+
- !ruby/object:Gem::Dependency
|
322
|
+
name: faraday
|
323
|
+
requirement: !ruby/object:Gem::Requirement
|
324
|
+
requirements:
|
325
|
+
- - ">="
|
326
|
+
- !ruby/object:Gem::Version
|
327
|
+
version: '0'
|
328
|
+
type: :development
|
329
|
+
prerelease: false
|
330
|
+
version_requirements: !ruby/object:Gem::Requirement
|
331
|
+
requirements:
|
332
|
+
- - ">="
|
333
|
+
- !ruby/object:Gem::Version
|
334
|
+
version: '0'
|
349
335
|
- !ruby/object:Gem::Dependency
|
350
336
|
name: googleauth
|
351
337
|
requirement: !ruby/object:Gem::Requirement
|
@@ -598,6 +584,20 @@ dependencies:
|
|
598
584
|
- - "~>"
|
599
585
|
- !ruby/object:Gem::Version
|
600
586
|
version: 2.10.0
|
587
|
+
- !ruby/object:Gem::Dependency
|
588
|
+
name: roo-xls
|
589
|
+
requirement: !ruby/object:Gem::Requirement
|
590
|
+
requirements:
|
591
|
+
- - "~>"
|
592
|
+
- !ruby/object:Gem::Version
|
593
|
+
version: 1.2.0
|
594
|
+
type: :development
|
595
|
+
prerelease: false
|
596
|
+
version_requirements: !ruby/object:Gem::Requirement
|
597
|
+
requirements:
|
598
|
+
- - "~>"
|
599
|
+
- !ruby/object:Gem::Version
|
600
|
+
version: 1.2.0
|
601
601
|
- !ruby/object:Gem::Dependency
|
602
602
|
name: ruby-openai
|
603
603
|
requirement: !ruby/object:Gem::Requirement
|
@@ -669,33 +669,33 @@ dependencies:
|
|
669
669
|
- !ruby/object:Gem::Version
|
670
670
|
version: 1.17.0
|
671
671
|
- !ruby/object:Gem::Dependency
|
672
|
-
name:
|
672
|
+
name: power_point_pptx
|
673
673
|
requirement: !ruby/object:Gem::Requirement
|
674
674
|
requirements:
|
675
|
-
- - "
|
675
|
+
- - "~>"
|
676
676
|
- !ruby/object:Gem::Version
|
677
|
-
version:
|
677
|
+
version: 0.1.0
|
678
678
|
type: :development
|
679
679
|
prerelease: false
|
680
680
|
version_requirements: !ruby/object:Gem::Requirement
|
681
681
|
requirements:
|
682
|
-
- - "
|
682
|
+
- - "~>"
|
683
683
|
- !ruby/object:Gem::Version
|
684
|
-
version:
|
684
|
+
version: 0.1.0
|
685
685
|
- !ruby/object:Gem::Dependency
|
686
|
-
name:
|
686
|
+
name: tiktoken_ruby
|
687
687
|
requirement: !ruby/object:Gem::Requirement
|
688
688
|
requirements:
|
689
689
|
- - "~>"
|
690
690
|
- !ruby/object:Gem::Version
|
691
|
-
version: 0.
|
691
|
+
version: 0.0.9
|
692
692
|
type: :development
|
693
693
|
prerelease: false
|
694
694
|
version_requirements: !ruby/object:Gem::Requirement
|
695
695
|
requirements:
|
696
696
|
- - "~>"
|
697
697
|
- !ruby/object:Gem::Version
|
698
|
-
version: 0.
|
698
|
+
version: 0.0.9
|
699
699
|
description: Build LLM-backed Ruby applications with Ruby's Langchain.rb
|
700
700
|
email:
|
701
701
|
- andrei.bondarev13@gmail.com
|
@@ -708,6 +708,7 @@ files:
|
|
708
708
|
- README.md
|
709
709
|
- lib/langchain.rb
|
710
710
|
- lib/langchain/assistants/assistant.rb
|
711
|
+
- lib/langchain/assistants/messages/anthropic_message.rb
|
711
712
|
- lib/langchain/assistants/messages/base.rb
|
712
713
|
- lib/langchain/assistants/messages/google_gemini_message.rb
|
713
714
|
- lib/langchain/assistants/messages/openai_message.rb
|
@@ -779,6 +780,7 @@ files:
|
|
779
780
|
- lib/langchain/processors/pdf.rb
|
780
781
|
- lib/langchain/processors/pptx.rb
|
781
782
|
- lib/langchain/processors/text.rb
|
783
|
+
- lib/langchain/processors/xls.rb
|
782
784
|
- lib/langchain/processors/xlsx.rb
|
783
785
|
- lib/langchain/prompt.rb
|
784
786
|
- lib/langchain/prompt/base.rb
|
@@ -798,6 +800,8 @@ files:
|
|
798
800
|
- lib/langchain/tool/news_retriever/news_retriever.rb
|
799
801
|
- lib/langchain/tool/ruby_code_interpreter/ruby_code_interpreter.json
|
800
802
|
- lib/langchain/tool/ruby_code_interpreter/ruby_code_interpreter.rb
|
803
|
+
- lib/langchain/tool/tavily/tavily.json
|
804
|
+
- lib/langchain/tool/tavily/tavily.rb
|
801
805
|
- lib/langchain/tool/vectorsearch/vectorsearch.json
|
802
806
|
- lib/langchain/tool/vectorsearch/vectorsearch.rb
|
803
807
|
- lib/langchain/tool/weather/weather.json
|
@@ -848,7 +852,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
848
852
|
- !ruby/object:Gem::Version
|
849
853
|
version: '0'
|
850
854
|
requirements: []
|
851
|
-
rubygems_version: 3.5.
|
855
|
+
rubygems_version: 3.5.11
|
852
856
|
signing_key:
|
853
857
|
specification_version: 4
|
854
858
|
summary: Build LLM-backed Ruby applications with Ruby's Langchain.rb
|