langchainrb 0.13.1 → 0.13.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 31daa3b09f92561f783122c10c1b48482bba75eac67e01550c71f7d76af36551
4
- data.tar.gz: 355e21f33fbc3d21ac364ce046b0d2908ef111d2aa17996605df953ca25d0640
3
+ metadata.gz: 30a8890d61255a8d937f2e2996f2b32de8ff432697a4a946afb08e853d383e2a
4
+ data.tar.gz: ee561cd2fac6b60308c07bfa300f09b6d1f8092ca669f2712f81a5b36814dd31
5
5
  SHA512:
6
- metadata.gz: f2bbf794a223f9b0da303f9b65a1a309213db00d45227ce6e9d5a9bc039d1150e06b786ff9730c1e4f2f2fd6d6566687d4a04d3c39f5dcd8d9e66c8e84e097ba
7
- data.tar.gz: b406738ff1be88c7c545ec284d3050a3b5c0bb34a747f345ff18cbaeb63a3abf9763ec723913bd58ddd62be261c6abd88a87448fd2b9d3bde00eb53d795931e2
6
+ metadata.gz: 3ab2bf6b1e57754497165b931e366d9c46051d829d3c05f23f73d3f20017eb91c86bf586cc318e7329e7990ae69002b0ad16638f5f41b3b0d141560a56c7236a
7
+ data.tar.gz: e621507bece6ff42ee80788bb9fb486dbd685b33d0f3893a9affca3d2edaf2a55441e98278bb056b290dcb2102d12a6eac5faea1c29d36a6e6f61118c43ee121
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.13.2] - 2024-05-20
4
+ - New `Langchain::LLM::GoogleGemini#embed()` method
5
+ - `Langchain::Assistant` works with `Langchain::LLM::Anthropic` llm
6
+ - New XLS file processor
7
+ - Fixes and improvements
8
+
3
9
  ## [0.13.1] - 2024-05-14
4
10
  - Better error handling for `Langchain::LLM::GoogleVertexAI`
5
11
 
@@ -8,6 +8,7 @@ module Langchain
8
8
  attr_accessor :tools
9
9
 
10
10
  SUPPORTED_LLMS = [
11
+ Langchain::LLM::Anthropic,
11
12
  Langchain::LLM::OpenAI,
12
13
  Langchain::LLM::GoogleGemini,
13
14
  Langchain::LLM::GoogleVertexAI
@@ -41,7 +42,7 @@ module Langchain
41
42
  if llm.is_a?(Langchain::LLM::OpenAI)
42
43
  add_message(role: "system", content: instructions) if instructions
43
44
  end
44
- # For Google Gemini, system instructions are added to the `system:` param in the `chat` method
45
+ # For Google Gemini, and Anthropic system instructions are added to the `system:` param in the `chat` method
45
46
  end
46
47
 
47
48
  # Add a user message to the thread
@@ -137,6 +138,8 @@ module Langchain
137
138
  Langchain::Messages::OpenAIMessage::TOOL_ROLE
138
139
  elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
139
140
  Langchain::Messages::GoogleGeminiMessage::TOOL_ROLE
141
+ elsif llm.is_a?(Langchain::LLM::Anthropic)
142
+ Langchain::Messages::AnthropicMessage::TOOL_ROLE
140
143
  end
141
144
 
142
145
  # TODO: Validate that `tool_call_id` is valid by scanning messages and checking if this tool call ID was invoked
@@ -179,12 +182,17 @@ module Langchain
179
182
  if tools.any?
180
183
  if llm.is_a?(Langchain::LLM::OpenAI)
181
184
  params[:tools] = tools.map(&:to_openai_tools).flatten
185
+ params[:tool_choice] = "auto"
186
+ elsif llm.is_a?(Langchain::LLM::Anthropic)
187
+ params[:tools] = tools.map(&:to_anthropic_tools).flatten
188
+ params[:system] = instructions if instructions
189
+ params[:tool_choice] = {type: "auto"}
182
190
  elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
183
191
  params[:tools] = tools.map(&:to_google_gemini_tools).flatten
184
192
  params[:system] = instructions if instructions
193
+ params[:tool_choice] = "auto"
185
194
  end
186
195
  # TODO: Not sure that tool_choice should always be "auto"; Maybe we can let the user toggle it.
187
- params[:tool_choice] = "auto"
188
196
  end
189
197
 
190
198
  llm.chat(**params)
@@ -200,6 +208,8 @@ module Langchain
200
208
  extract_openai_tool_call(tool_call: tool_call)
201
209
  elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
202
210
  extract_google_gemini_tool_call(tool_call: tool_call)
211
+ elsif llm.is_a?(Langchain::LLM::Anthropic)
212
+ extract_anthropic_tool_call(tool_call: tool_call)
203
213
  end
204
214
 
205
215
  tool_instance = tools.find do |t|
@@ -234,6 +244,20 @@ module Langchain
234
244
  [tool_call_id, tool_name, method_name, tool_arguments]
235
245
  end
236
246
 
247
+ # Extract the tool call information from the Anthropic tool call hash
248
+ #
249
+ # @param tool_call [Hash] The tool call hash, format: {"type"=>"tool_use", "id"=>"toolu_01TjusbFApEbwKPRWTRwzadR", "name"=>"news_retriever__get_top_headlines", "input"=>{"country"=>"us", "page_size"=>10}}], "stop_reason"=>"tool_use"}
250
+ # @return [Array] The tool call information
251
+ def extract_anthropic_tool_call(tool_call:)
252
+ tool_call_id = tool_call.dig("id")
253
+
254
+ function_name = tool_call.dig("name")
255
+ tool_name, method_name = function_name.split("__")
256
+ tool_arguments = tool_call.dig("input").transform_keys(&:to_sym)
257
+
258
+ [tool_call_id, tool_name, method_name, tool_arguments]
259
+ end
260
+
237
261
  # Extract the tool call information from the Google Gemini tool call hash
238
262
  #
239
263
  # @param tool_call [Hash] The tool call hash, format: {"functionCall"=>{"name"=>"weather__execute", "args"=>{"input"=>"NYC"}}}
@@ -260,6 +284,8 @@ module Langchain
260
284
  Langchain::Messages::OpenAIMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
261
285
  elsif [Langchain::LLM::GoogleGemini, Langchain::LLM::GoogleVertexAI].include?(llm.class)
262
286
  Langchain::Messages::GoogleGeminiMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
287
+ elsif llm.is_a?(Langchain::LLM::Anthropic)
288
+ Langchain::Messages::AnthropicMessage.new(role: role, content: content, tool_calls: tool_calls, tool_call_id: tool_call_id)
263
289
  end
264
290
  end
265
291
 
@@ -0,0 +1,75 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Messages
5
+ class AnthropicMessage < Base
6
+ ROLES = [
7
+ "assistant",
8
+ "user",
9
+ "tool_result"
10
+ ].freeze
11
+
12
+ TOOL_ROLE = "tool_result"
13
+
14
+ def initialize(role:, content: nil, tool_calls: [], tool_call_id: nil)
15
+ raise ArgumentError, "Role must be one of #{ROLES.join(", ")}" unless ROLES.include?(role)
16
+ raise ArgumentError, "Tool calls must be an array of hashes" unless tool_calls.is_a?(Array) && tool_calls.all? { |tool_call| tool_call.is_a?(Hash) }
17
+
18
+ @role = role
19
+ # Some Tools return content as a JSON hence `.to_s`
20
+ @content = content.to_s
21
+ @tool_calls = tool_calls
22
+ @tool_call_id = tool_call_id
23
+ end
24
+
25
+ # Convert the message to an Anthropic API-compatible hash
26
+ #
27
+ # @return [Hash] The message as an Anthropic API-compatible hash
28
+ def to_hash
29
+ {}.tap do |h|
30
+ h[:role] = tool? ? "user" : role
31
+
32
+ h[:content] = if tool?
33
+ [
34
+ {
35
+ type: "tool_result",
36
+ tool_use_id: tool_call_id,
37
+ content: content
38
+ }
39
+ ]
40
+ elsif tool_calls.any?
41
+ tool_calls
42
+ else
43
+ content
44
+ end
45
+ end
46
+ end
47
+
48
+ # Check if the message is a tool call
49
+ #
50
+ # @return [Boolean] true/false whether this message is a tool call
51
+ def tool?
52
+ role == "tool_result"
53
+ end
54
+
55
+ # Anthropic does not implement system prompts
56
+ def system?
57
+ false
58
+ end
59
+
60
+ # Check if the message came from an LLM
61
+ #
62
+ # @return [Boolean] true/false whether this message was produced by an LLM
63
+ def assistant?
64
+ role == "assistant"
65
+ end
66
+
67
+ # Check if the message came from an LLM
68
+ #
69
+ # @return [Boolean] true/false whether this message was produced by an LLM
70
+ def llm?
71
+ assistant?
72
+ end
73
+ end
74
+ end
75
+ end
@@ -101,6 +101,8 @@ module Langchain::LLM
101
101
  # @option params [Float] :top_p Use nucleus sampling.
102
102
  # @return [Langchain::LLM::AnthropicResponse] The chat completion
103
103
  def chat(params = {})
104
+ set_extra_headers! if params[:tools]
105
+
104
106
  parameters = chat_parameters.to_params(params)
105
107
 
106
108
  raise ArgumentError.new("messages argument is required") if Array(parameters[:messages]).empty?
@@ -111,5 +113,11 @@ module Langchain::LLM
111
113
 
112
114
  Langchain::LLM::AnthropicResponse.new(response)
113
115
  end
116
+
117
+ private
118
+
119
+ def set_extra_headers!
120
+ ::Anthropic.configuration.extra_headers = {"anthropic-beta": "tools-2024-05-16"}
121
+ end
114
122
  end
115
123
  end
@@ -6,6 +6,7 @@ module Langchain::LLM
6
6
  class GoogleGemini < Base
7
7
  DEFAULTS = {
8
8
  chat_completion_model_name: "gemini-1.5-pro-latest",
9
+ embeddings_model_name: "text-embedding-004",
9
10
  temperature: 0.0
10
11
  }
11
12
 
@@ -63,5 +64,35 @@ module Langchain::LLM
63
64
  raise StandardError.new(response)
64
65
  end
65
66
  end
67
+
68
+ def embed(
69
+ text:,
70
+ model: @defaults[:embeddings_model_name]
71
+ )
72
+
73
+ params = {
74
+ content: {
75
+ parts: [
76
+ {
77
+ text: text
78
+ }
79
+ ]
80
+ }
81
+ }
82
+
83
+ uri = URI("https://generativelanguage.googleapis.com/v1beta/models/#{model}:embedContent?key=#{api_key}")
84
+
85
+ request = Net::HTTP::Post.new(uri)
86
+ request.content_type = "application/json"
87
+ request.body = params.to_json
88
+
89
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == "https") do |http|
90
+ http.request(request)
91
+ end
92
+
93
+ parsed_response = JSON.parse(response.body)
94
+
95
+ Langchain::LLM::GoogleGeminiResponse.new(parsed_response, model: model)
96
+ end
66
97
  end
67
98
  end
@@ -28,7 +28,10 @@ module Langchain::LLM
28
28
  def initialize(project_id:, region:, default_options: {})
29
29
  depends_on "googleauth"
30
30
 
31
- @authorizer = ::Google::Auth.get_application_default
31
+ @authorizer = ::Google::Auth.get_application_default(scope: [
32
+ "https://www.googleapis.com/auth/cloud-platform",
33
+ "https://www.googleapis.com/auth/generative-language.retriever"
34
+ ])
32
35
  proj_id = project_id || @authorizer.project_id || @authorizer.quota_project_id
33
36
  @url = "https://#{region}-aiplatform.googleapis.com/v1/projects/#{proj_id}/locations/#{region}/publishers/google/models/"
34
37
 
@@ -11,12 +11,12 @@ module Langchain::LLM
11
11
  # hf = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
12
12
  #
13
13
  class HuggingFace < Base
14
- # The gem does not currently accept other models:
15
- # https://github.com/alchaplinsky/hugging-face/blob/main/lib/hugging_face/inference_api.rb#L32-L34
16
14
  DEFAULTS = {
17
- temperature: 0.0,
18
- embeddings_model_name: "sentence-transformers/all-MiniLM-L6-v2",
19
- dimensions: 384 # Vector size generated by the above model
15
+ embeddings_model_name: "sentence-transformers/all-MiniLM-L6-v2"
16
+ }.freeze
17
+
18
+ EMBEDDING_SIZES = {
19
+ "sentence-transformers/all-MiniLM-L6-v2": 384
20
20
  }.freeze
21
21
 
22
22
  #
@@ -24,10 +24,21 @@ module Langchain::LLM
24
24
  #
25
25
  # @param api_key [String] The API key to use
26
26
  #
27
- def initialize(api_key:)
27
+ def initialize(api_key:, default_options: {})
28
28
  depends_on "hugging-face", req: "hugging_face"
29
29
 
30
30
  @client = ::HuggingFace::InferenceApi.new(api_token: api_key)
31
+ @defaults = DEFAULTS.merge(default_options)
32
+ end
33
+
34
+ # Returns the # of vector dimensions for the embeddings
35
+ # @return [Integer] The # of vector dimensions
36
+ def default_dimensions
37
+ # since Huggin Face can run multiple models, look it up or generate an embedding and return the size
38
+ @default_dimensions ||= @defaults[:dimensions] ||
39
+ EMBEDDING_SIZES.fetch(@defaults[:embeddings_model_name].to_sym) do
40
+ embed(text: "test").embedding.size
41
+ end
31
42
  end
32
43
 
33
44
  #
@@ -39,9 +50,9 @@ module Langchain::LLM
39
50
  def embed(text:)
40
51
  response = client.embedding(
41
52
  input: text,
42
- model: DEFAULTS[:embeddings_model_name]
53
+ model: @defaults[:embeddings_model_name]
43
54
  )
44
- Langchain::LLM::HuggingFaceResponse.new(response, model: DEFAULTS[:embeddings_model_name])
55
+ Langchain::LLM::HuggingFaceResponse.new(response, model: @defaults[:embeddings_model_name])
45
56
  end
46
57
  end
47
58
  end
@@ -11,7 +11,17 @@ module Langchain::LLM
11
11
  end
12
12
 
13
13
  def chat_completion
14
- raw_response.dig("content", 0, "text")
14
+ chat_completion = chat_completions.find { |h| h["type"] == "text" }
15
+ chat_completion.dig("text")
16
+ end
17
+
18
+ def tool_calls
19
+ tool_call = chat_completions.find { |h| h["type"] == "tool_use" }
20
+ tool_call ? [tool_call] : []
21
+ end
22
+
23
+ def chat_completions
24
+ raw_response.dig("content")
15
25
  end
16
26
 
17
27
  def completions
@@ -27,7 +27,11 @@ module Langchain::LLM
27
27
  end
28
28
 
29
29
  def embeddings
30
- [raw_response.dig("predictions", 0, "embeddings", "values")]
30
+ if raw_response.key?("embedding")
31
+ [raw_response.dig("embedding", "values")]
32
+ else
33
+ [raw_response.dig("predictions", 0, "embeddings", "values")]
34
+ end
31
35
  end
32
36
 
33
37
  def prompt_tokens
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class Xls < Base
6
+ EXTENSIONS = [".xls"].freeze
7
+ CONTENT_TYPES = ["application/vnd.ms-excel"].freeze
8
+
9
+ def initialize(*)
10
+ depends_on "roo"
11
+ depends_on "roo-xls"
12
+ end
13
+
14
+ # Parse the document and return the text
15
+ # @param [File] data
16
+ # @return [Array<Array<String>>] Array of rows, each row is an array of cells
17
+ def parse(data)
18
+ xls_file = Roo::Spreadsheet.open(data)
19
+ xls_file.each_with_pagename.flat_map do |_, sheet|
20
+ sheet.map do |row|
21
+ row.map { |i| i.to_s.strip }
22
+ end
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -71,6 +71,18 @@ module Langchain::Tool
71
71
  method_annotations
72
72
  end
73
73
 
74
+ # Returns the tool as a list of Anthropic formatted functions
75
+ #
76
+ # @return [Array<Hash>] List of hashes representing the tool as Anthropic formatted functions
77
+ def to_anthropic_tools
78
+ method_annotations.map do |annotation|
79
+ # Slice out only the content of the "function" key
80
+ annotation["function"]
81
+ # Rename "parameters" to "input_schema" key
82
+ .transform_keys("parameters" => "input_schema")
83
+ end
84
+ end
85
+
74
86
  # Returns the tool as a list of Google Gemini formatted functions
75
87
  #
76
88
  # @return [Array<Hash>] List of hashes representing the tool as Google Gemini formatted functions
@@ -68,7 +68,8 @@
68
68
  "properties": {
69
69
  "country": {
70
70
  "type": "string",
71
- "description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for."
71
+ "description": "The 2-letter ISO 3166-1 code of the country you want to get headlines for.",
72
+ "enum": ["ae", "ar", "at", "au", "be", "bg", "br", "ca", "ch", "cn", "co", "cu", "cz", "de", "eg", "fr", "gb", "gr", "hk", "hu", "id", "ie", "il", "in", "it", "jp", "kr", "lt", "lv", "ma", "mx", "my", "ng", "nl", "no", "nz", "ph", "pl", "pt", "ro", "rs", "ru", "sa", "se", "sg", "si", "sk", "th", "tr", "tw", "ua", "us", "ve", "za"]
72
73
  },
73
74
  "category": {
74
75
  "type": "string",
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.13.1"
4
+ VERSION = "0.13.2"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.13.1
4
+ version: 0.13.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-05-15 00:00:00.000000000 Z
11
+ date: 2024-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -598,6 +598,20 @@ dependencies:
598
598
  - - "~>"
599
599
  - !ruby/object:Gem::Version
600
600
  version: 2.10.0
601
+ - !ruby/object:Gem::Dependency
602
+ name: roo-xls
603
+ requirement: !ruby/object:Gem::Requirement
604
+ requirements:
605
+ - - "~>"
606
+ - !ruby/object:Gem::Version
607
+ version: 1.2.0
608
+ type: :development
609
+ prerelease: false
610
+ version_requirements: !ruby/object:Gem::Requirement
611
+ requirements:
612
+ - - "~>"
613
+ - !ruby/object:Gem::Version
614
+ version: 1.2.0
601
615
  - !ruby/object:Gem::Dependency
602
616
  name: ruby-openai
603
617
  requirement: !ruby/object:Gem::Requirement
@@ -708,6 +722,7 @@ files:
708
722
  - README.md
709
723
  - lib/langchain.rb
710
724
  - lib/langchain/assistants/assistant.rb
725
+ - lib/langchain/assistants/messages/anthropic_message.rb
711
726
  - lib/langchain/assistants/messages/base.rb
712
727
  - lib/langchain/assistants/messages/google_gemini_message.rb
713
728
  - lib/langchain/assistants/messages/openai_message.rb
@@ -779,6 +794,7 @@ files:
779
794
  - lib/langchain/processors/pdf.rb
780
795
  - lib/langchain/processors/pptx.rb
781
796
  - lib/langchain/processors/text.rb
797
+ - lib/langchain/processors/xls.rb
782
798
  - lib/langchain/processors/xlsx.rb
783
799
  - lib/langchain/prompt.rb
784
800
  - lib/langchain/prompt/base.rb