langchainrb 0.13.0 → 0.13.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b146eb8568d30ae12aca93a25818fcff7421b7ee2e968330f3a68c5e523da148
4
- data.tar.gz: 33f88d7ba03501606706314dce58f626fa0df5aab50639b5f5db3df527ee6520
3
+ metadata.gz: 30a8890d61255a8d937f2e2996f2b32de8ff432697a4a946afb08e853d383e2a
4
+ data.tar.gz: ee561cd2fac6b60308c07bfa300f09b6d1f8092ca669f2712f81a5b36814dd31
5
5
  SHA512:
6
- metadata.gz: 6518e30de12653426280f6f8cf05f37a6d4b311ad4219af52276bace8a75ec6440b8f42c208d9d5c07bb4218f3259cc95ea9edd77f8ba037e7a1600a7dfa3170
7
- data.tar.gz: 7f881a4347866c8b52161adaf6b98b669e38b4e2fd1ac513f02efd1dcfe73b2552ad8e65acdd02e9e002769ad3b394850f720e0971774632c2f489c25d9ce076
6
+ metadata.gz: 3ab2bf6b1e57754497165b931e366d9c46051d829d3c05f23f73d3f20017eb91c86bf586cc318e7329e7990ae69002b0ad16638f5f41b3b0d141560a56c7236a
7
+ data.tar.gz: e621507bece6ff42ee80788bb9fb486dbd685b33d0f3893a9affca3d2edaf2a55441e98278bb056b290dcb2102d12a6eac5faea1c29d36a6e6f61118c43ee121
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.13.2] - 2024-05-20
4
+ - New `Langchain::LLM::GoogleGemini#embed()` method
5
+ - `Langchain::Assistant` works with `Langchain::LLM::Anthropic` llm
6
+ - New XLS file processor
7
+ - Fixes and improvements
8
+
9
+ ## [0.13.1] - 2024-05-14
10
+ - Better error handling for `Langchain::LLM::GoogleVertexAI`
11
+
3
12
  ## [0.13.0] - 2024-05-14
4
13
  - New 🛠️ `Langchain::Tool::NewsRetriever` tool to fetch news via newsapi.org
5
14
  - Langchain::Assistant works with `Langchain::LLM::GoogleVertexAI` and `Langchain::LLM::GoogleGemini` llms
@@ -8,6 +8,7 @@ module Langchain
8
8
  attr_accessor :tools
9
9
 
10
10
  SUPPORTED_LLMS = [
11
+ Langchain::LLM::Anthropic,
11
12
  Langchain::LLM::OpenAI,
12
13
  Langchain::LLM::GoogleGemini,
13
14
  Langchain::LLM::GoogleVertexAI
@@ -41,7 +42,7 @@ module Langchain
41
42
  if llm.is_a?(Langchain::LLM::OpenAI)
42
43
  add_message(role: "system", content: instructions) if instructions
43
44
  end
44
- # For Google Gemini, system instructions are added to the `system:` param in the `chat` method
45
+ # For Google Gemini, and Anthropic system instructions are added to the `system:` param in the `chat` method
45
46
  end
46
47
 
47
48
  # Add a user message to the thread
@@ -137,6 +138,8 @@ module Langchain
137
138
  Langchain::Messages::OpenAIMessage::TOOL_ROLE
138
139
  elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
139
140
  Langchain::Messages::GoogleGeminiMessage::TOOL_ROLE
141
+ elsif llm.is_a?(Langchain::LLM::Anthropic)
142
+ Langchain::Messages::AnthropicMessage::TOOL_ROLE
140
143
  end
141
144
 
142
145
  # TODO: Validate that `tool_call_id` is valid by scanning messages and checking if this tool call ID was invoked
@@ -179,12 +182,17 @@ module Langchain
179
182
  if tools.any?
180
183
  if llm.is_a?(Langchain::LLM::OpenAI)
181
184
  params[:tools] = tools.map(&:to_openai_tools).flatten
185
+ params[:tool_choice] = "auto"
186
+ elsif llm.is_a?(Langchain::LLM::Anthropic)
187
+ params[:tools] = tools.map(&:to_anthropic_tools).flatten
188
+ params[:system] = instructions if instructions
189
+ params[:tool_choice] = {type: "auto"}
182
190
  elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
183
191
  params[:tools] = tools.map(&:to_google_gemini_tools).flatten
184
192
  params[:system] = instructions if instructions
193
+ params[:tool_choice] = "auto"
185
194
  end
186
195
  # TODO: Not sure that tool_choice should always be "auto"; Maybe we can let the user toggle it.
187
- params[:tool_choice] = "auto"
188
196
  end
189
197
 
190
198
  llm.chat(**params)
@@ -200,6 +208,8 @@ module Langchain
200
208
  extract_openai_tool_call(tool_call: tool_call)
201
209
  elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
202
210
  extract_google_gemini_tool_call(tool_call: tool_call)
211
+ elsif llm.is_a?(Langchain::LLM::Anthropic)
212
+ extract_anthropic_tool_call(tool_call: tool_call)
203
213
  end
204
214
 
205
215
  tool_instance = tools.find do |t|
@@ -234,6 +244,20 @@ module Langchain
234
244
  [tool_call_id, tool_name, method_name, tool_arguments]
235
245
  end
236
246
 
247
+ # Extract the tool call information from the Anthropic tool call hash
248
+ #
249
+ # @param tool_call [Hash] The tool call hash, format: {"type"=>"tool_use", "id"=>"toolu_01TjusbFApEbwKPRWTRwzadR", "name"=>"news_retriever__get_top_headlines", "input"=>{"country"=>"us", "page_size"=>10}}], "stop_reason"=>"tool_use"}
250
+ # @return [Array] The tool call information
251
+ def extract_anthropic_tool_call(tool_call:)
252
+ tool_call_id = tool_call.dig("id")
253
+
254
+ function_name = tool_call.dig("name")
255
+ tool_name, method_name = function_name.split("__")
256
+ tool_arguments = tool_call.dig("input").transform_keys(&:to_sym)
257
+
258
+ [tool_call_id, tool_name, method_name, tool_arguments]
259
+ end
260
+
237
261
  # Extract the tool call information from the Google Gemini tool call hash
238
262
  #
239
263
  # @param tool_call [Hash] The tool call hash, format: {"functionCall"=>{"name"=>"weather__execute", "args"=>{"input"=>"NYC"}}}
@@ -260,6 +284,8 @@ module Langchain
260
284
  Langchain::Messages::OpenAIMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
261
285
  elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
262
286
  Langchain::Messages::GoogleGeminiMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
287
+ elsif llm.is_a?(Langchain::LLM::Anthropic)
288
+ Langchain::Messages::AnthropicMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
263
289
  end
264
290
  end
265
291
 
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Messages
5
+ class AnthropicMessage < Base
6
+ ROLES = [
7
+ "assistant",
8
+ "user",
9
+ "tool_result"
10
+ ].freeze
11
+
12
+ TOOL_ROLE = "tool_result"
13
+
14
+ def initialize(role:, content: nil, tool_calls: [], tool_call_id: nil)
15
+ raise ArgumentError, "Role must be one of #{ROLES.join(", ")}" unless ROLES.include?(role)
16
+ raise ArgumentError, "Tool calls must be an array of hashes" unless tool_calls.is_a?(Array) && tool_calls.all? { |tool_call| tool_call.is_a?(Hash) }
17
+
18
+ @role = role
19
+ # Some Tools return content as a JSON hence `.to_s`
20
+ @content = content.to_s
21
+ @tool_calls = tool_calls
22
+ @tool_call_id = tool_call_id
23
+ end
24
+
25
+ # Convert the message to an Anthropic API-compatible hash
26
+ #
27
+ # @return [Hash] The message as an Anthropic API-compatible hash
28
+ def to_hash
29
+ {}.tap do |h|
30
+ h[:role] = tool? ? "user" : role
31
+
32
+ h[:content] = if tool?
33
+ [
34
+ {
35
+ type: "tool_result",
36
+ tool_use_id: tool_call_id,
37
+ content: content
38
+ }
39
+ ]
40
+ elsif tool_calls.any?
41
+ tool_calls
42
+ else
43
+ content
44
+ end
45
+ end
46
+ end
47
+
48
+ # Check if the message is a tool call
49
+ #
50
+ # @return [Boolean] true/false whether this message is a tool call
51
+ def tool?
52
+ role == "tool_result"
53
+ end
54
+
55
+ # Anthropic does not implement system prompts
56
+ def system?
57
+ false
58
+ end
59
+
60
+ # Check if the message came from an LLM
61
+ #
62
+ # @return [Boolean] true/false whether this message was produced by an LLM
63
+ def assistant?
64
+ role == "assistant"
65
+ end
66
+
67
+ # Check if the message came from an LLM
68
+ #
69
+ # @return [Boolean] true/false whether this message was produced by an LLM
70
+ def llm?
71
+ assistant?
72
+ end
73
+ end
74
+ end
75
+ end
@@ -42,6 +42,8 @@ module Langchain
42
42
 
43
43
  def count_verified_statements(verifications)
44
44
  match = verifications.match(/Final verdict for each statement in order:\s*(.*)/)
45
+ return 0.0 unless match # no verified statements found
46
+
45
47
  verdicts = match.captures.first
46
48
  verdicts
47
49
  .split(".")
@@ -101,6 +101,8 @@ module Langchain::LLM
101
101
  # @option params [Float] :top_p Use nucleus sampling.
102
102
  # @return [Langchain::LLM::AnthropicResponse] The chat completion
103
103
  def chat(params = {})
104
+ set_extra_headers! if params[:tools]
105
+
104
106
  parameters = chat_parameters.to_params(params)
105
107
 
106
108
  raise ArgumentError.new("messages argument is required") if Array(parameters[:messages]).empty?
@@ -111,5 +113,11 @@ module Langchain::LLM
111
113
 
112
114
  Langchain::LLM::AnthropicResponse.new(response)
113
115
  end
116
+
117
+ private
118
+
119
+ def set_extra_headers!
120
+ ::Anthropic.configuration.extra_headers = {"anthropic-beta": "tools-2024-05-16"}
121
+ end
114
122
  end
115
123
  end
@@ -6,6 +6,7 @@ module Langchain::LLM
6
6
  class GoogleGemini < Base
7
7
  DEFAULTS = {
8
8
  chat_completion_model_name: "gemini-1.5-pro-latest",
9
+ embeddings_model_name: "text-embedding-004",
9
10
  temperature: 0.0
10
11
  }
11
12
 
@@ -63,5 +64,35 @@ module Langchain::LLM
63
64
  raise StandardError.new(response)
64
65
  end
65
66
  end
67
+
68
+ def embed(
69
+ text:,
70
+ model: @defaults[:embeddings_model_name]
71
+ )
72
+
73
+ params = {
74
+ content: {
75
+ parts: [
76
+ {
77
+ text: text
78
+ }
79
+ ]
80
+ }
81
+ }
82
+
83
+ uri = URI("https://generativelanguage.googleapis.com/v1beta/models/#{model}:embedContent?key=#{api_key}")
84
+
85
+ request = Net::HTTP::Post.new(uri)
86
+ request.content_type = "application/json"
87
+ request.body = params.to_json
88
+
89
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https") do |http|
90
+ http.request(request)
91
+ end
92
+
93
+ parsed_response = JSON.parse(response.body)
94
+
95
+ Langchain::LLM::GoogleGeminiResponse.new(parsed_response, model: model)
96
+ end
66
97
  end
67
98
  end
@@ -28,7 +28,10 @@ module Langchain::LLM
28
28
  def initialize(project_id:, region:, default_options: {})
29
29
  depends_on "googleauth"
30
30
 
31
- @authorizer = ::Google::Auth.get_application_default
31
+ @authorizer = ::Google::Auth.get_application_default(scope: [
32
+ "https://www.googleapis.com/auth/cloud-platform",
33
+ "https://www.googleapis.com/auth/generative-language.retriever"
34
+ ])
32
35
  proj_id = project_id || @authorizer.project_id || @authorizer.quota_project_id
33
36
  @url = "https://#{region}-aiplatform.googleapis.com/v1/projects/#{proj_id}/locations/#{region}/publishers/google/models/"
34
37
 
@@ -58,16 +61,20 @@ module Langchain::LLM
58
61
  )
59
62
  params = {instances: [{content: text}]}
60
63
 
61
- response = HTTParty.post(
62
- "#{url}#{model}:predict",
63
- body: params.to_json,
64
- headers: {
65
- "Content-Type" => "application/json",
66
- "Authorization" => "Bearer #{@authorizer.fetch_access_token!["access_token"]}"
67
- }
68
- )
64
+ uri = URI("#{url}#{model}:predict")
69
65
 
70
- Langchain::LLM::GoogleGeminiResponse.new(response, model: model)
66
+ request = Net::HTTP::Post.new(uri)
67
+ request.content_type = "application/json"
68
+ request["Authorization"] = "Bearer #{@authorizer.fetch_access_token!["access_token"]}"
69
+ request.body = params.to_json
70
+
71
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https") do |http|
72
+ http.request(request)
73
+ end
74
+
75
+ parsed_response = JSON.parse(response.body)
76
+
77
+ Langchain::LLM::GoogleGeminiResponse.new(parsed_response, model: model)
71
78
  end
72
79
 
73
80
  # Generate a chat completion for given messages
@@ -81,6 +88,7 @@ module Langchain::LLM
81
88
  def chat(params = {})
82
89
  params[:system] = {parts: [{text: params[:system]}]} if params[:system]
83
90
  params[:tools] = {function_declarations: params[:tools]} if params[:tools]
91
+ # This throws an error when tool_choice is passed
84
92
  params[:tool_choice] = {function_calling_config: {mode: params[:tool_choice].upcase}} if params[:tool_choice]
85
93
 
86
94
  raise ArgumentError.new("messages argument is required") if Array(params[:messages]).empty?
@@ -101,7 +109,13 @@ module Langchain::LLM
101
109
 
102
110
  parsed_response = JSON.parse(response.body)
103
111
 
104
- Langchain::LLM::GoogleGeminiResponse.new(parsed_response, model: parameters[:model])
112
+ wrapped_response = Langchain::LLM::GoogleGeminiResponse.new(parsed_response, model: parameters[:model])
113
+
114
+ if wrapped_response.chat_completion || Array(wrapped_response.tool_calls).any?
115
+ wrapped_response
116
+ else
117
+ raise StandardError.new(response)
118
+ end
105
119
  end
106
120
  end
107
121
  end
@@ -11,12 +11,12 @@ module Langchain::LLM
11
11
  # hf = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
12
12
  #
13
13
  class HuggingFace < Base
14
- # The gem does not currently accept other models:
15
- # https://github.com/alchaplinsky/hugging-face/blob/main/lib/hugging_face/inference_api.rb#L32-L34
16
14
  DEFAULTS = {
17
- temperature: 0.0,
18
- embeddings_model_name: "sentence-transformers/all-MiniLM-L6-v2",
19
- dimensions: 384 # Vector size generated by the above model
15
+ embeddings_model_name: "sentence-transformers/all-MiniLM-L6-v2"
16
+ }.freeze
17
+
18
+ EMBEDDING_SIZES = {
19
+ "sentence-transformers/all-MiniLM-L6-v2": 384
20
20
  }.freeze
21
21
 
22
22
  #
@@ -24,10 +24,21 @@ module Langchain::LLM
24
24
  #
25
25
  # @param api_key [String] The API key to use
26
26
  #
27
- def initialize(api_key:)
27
+ def initialize(api_key:, default_options: {})
28
28
  depends_on "hugging-face", req: "hugging_face"
29
29
 
30
30
  @client = ::HuggingFace::InferenceApi.new(api_token: api_key)
31
+ @defaults = DEFAULTS.merge(default_options)
32
+ end
33
+
34
+ # Returns the # of vector dimensions for the embeddings
35
+ # @return [Integer] The # of vector dimensions
36
+ def default_dimensions
37
+ # since Huggin Face can run multiple models, look it up or generate an embedding and return the size
38
+ @default_dimensions ||= @defaults[:dimensions] ||
39
+ EMBEDDING_SIZES.fetch(@defaults[:embeddings_model_name].to_sym) do
40
+ embed(text: "test").embedding.size
41
+ end
31
42
  end
32
43
 
33
44
  #
@@ -39,9 +50,9 @@ module Langchain::LLM
39
50
  def embed(text:)
40
51
  response = client.embedding(
41
52
  input: text,
42
- model: DEFAULTS[:embeddings_model_name]
53
+ model: @defaults[:embeddings_model_name]
43
54
  )
44
- Langchain::LLM::HuggingFaceResponse.new(response, model: DEFAULTS[:embeddings_model_name])
55
+ Langchain::LLM::HuggingFaceResponse.new(response, model: @defaults[:embeddings_model_name])
45
56
  end
46
57
  end
47
58
  end
@@ -11,7 +11,17 @@ module Langchain::LLM
11
11
  end
12
12
 
13
13
  def chat_completion
14
- raw_response.dig("content", 0, "text")
14
+ chat_completion = chat_completions.find { |h| h["type"] == "text" }
15
+ chat_completion.dig("text")
16
+ end
17
+
18
+ def tool_calls
19
+ tool_call = chat_completions.find { |h| h["type"] == "tool_use" }
20
+ tool_call ? [tool_call] : []
21
+ end
22
+
23
+ def chat_completions
24
+ raw_response.dig("content")
15
25
  end
16
26
 
17
27
  def completions
@@ -27,7 +27,11 @@ module Langchain::LLM
27
27
  end
28
28
 
29
29
  def embeddings
30
- [raw_response.dig("predictions", 0, "embeddings", "values")]
30
+ if raw_response.key?("embedding")
31
+ [raw_response.dig("embedding", "values")]
32
+ else
33
+ [raw_response.dig("predictions", 0, "embeddings", "values")]
34
+ end
31
35
  end
32
36
 
33
37
  def prompt_tokens
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class Xls < Base
6
+ EXTENSIONS = [".xls"].freeze
7
+ CONTENT_TYPES = ["application/vnd.ms-excel"].freeze
8
+
9
+ def initialize(*)
10
+ depends_on "roo"
11
+ depends_on "roo-xls"
12
+ end
13
+
14
+ # Parse the document and return the text
15
+ # @param [File] data
16
+ # @return [Array<Array<String>>] Array of rows, each row is an array of cells
17
+ def parse(data)
18
+ xls_file = Roo::Spreadsheet.open(data)
19
+ xls_file.each_with_pagename.flat_map do |_, sheet|
20
+ sheet.map do |row|
21
+ row.map { |i| i.to_s.strip }
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -71,6 +71,18 @@ module Langchain::Tool
71
71
  method_annotations
72
72
  end
73
73
 
74
+ # Returns the tool as a list of Anthropic formatted functions
75
+ #
76
+ # @return [Array<Hash>] List of hashes representing the tool as Anthropic formatted functions
77
+ def to_anthropic_tools
78
+ method_annotations.map do |annotation|
79
+ # Slice out only the content of the "function" key
80
+ annotation["function"]
81
+ # Rename "parameters" to "input_schema" key
82
+ .transform_keys("parameters" => "input_schema")
83
+ end
84
+ end
85
+
74
86
  # Returns the tool as a list of Google Gemini formatted functions
75
87
  #
76
88
  # @return [Array<Hash>] List of hashes representing the tool as Google Gemini formatted functions
@@ -68,7 +68,8 @@
68
68
  "properties": {
69
69
  "country": {
70
70
  "type": "string",
71
- "description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for."
71
+ "description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for.",
72
+ "enum": ["ae", "ar", "at", "au", "be", "bg", "br", "ca", "ch", "cn", "co", "cu", "cz", "de", "eg", "fr", "gb", "gr", "hk", "hu", "id", "ie", "il", "in", "it", "jp", "kr", "lt", "lv", "ma", "mx", "my", "ng", "nl", "no", "nz", "ph", "pl", "pt", "ro", "rs", "ru", "sa", "se", "sg", "si", "sk", "th", "tr", "tw", "ua", "us", "ve", "za"]
72
73
  },
73
74
  "category": {
74
75
  "type": "string",
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.13.0"
4
+ VERSION = "0.13.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.0
4
+ version: 0.13.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-05-14 00:00:00.000000000 Z
11
+ date: 2024-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -598,6 +598,20 @@ dependencies:
598
598
  - - "~>"
599
599
  - !ruby/object:Gem::Version
600
600
  version: 2.10.0
601
+ - !ruby/object:Gem::Dependency
602
+ name: roo-xls
603
+ requirement: !ruby/object:Gem::Requirement
604
+ requirements:
605
+ - - "~>"
606
+ - !ruby/object:Gem::Version
607
+ version: 1.2.0
608
+ type: :development
609
+ prerelease: false
610
+ version_requirements: !ruby/object:Gem::Requirement
611
+ requirements:
612
+ - - "~>"
613
+ - !ruby/object:Gem::Version
614
+ version: 1.2.0
601
615
  - !ruby/object:Gem::Dependency
602
616
  name: ruby-openai
603
617
  requirement: !ruby/object:Gem::Requirement
@@ -708,6 +722,7 @@ files:
708
722
  - README.md
709
723
  - lib/langchain.rb
710
724
  - lib/langchain/assistants/assistant.rb
725
+ - lib/langchain/assistants/messages/anthropic_message.rb
711
726
  - lib/langchain/assistants/messages/base.rb
712
727
  - lib/langchain/assistants/messages/google_gemini_message.rb
713
728
  - lib/langchain/assistants/messages/openai_message.rb
@@ -779,6 +794,7 @@ files:
779
794
  - lib/langchain/processors/pdf.rb
780
795
  - lib/langchain/processors/pptx.rb
781
796
  - lib/langchain/processors/text.rb
797
+ - lib/langchain/processors/xls.rb
782
798
  - lib/langchain/processors/xlsx.rb
783
799
  - lib/langchain/prompt.rb
784
800
  - lib/langchain/prompt/base.rb