langchainrb 0.11.1 → 0.11.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '08b38ee39600716a9854a387fc0b54091290f27d7afa88b276a480651049bdd4'
4
- data.tar.gz: 69d4292c129e751d9d4001912f5ab54ba23f9d9944a5e4294f4ba1dd426d401e
3
+ metadata.gz: c678ae75bc25b0501223f5b6ffd396a9159af4d0ddd87ddb1657429ed2ba24ce
4
+ data.tar.gz: df50ef0a6d9c1a3100153a06084556cac983069d1a38739bd6606f39f63bd332
5
5
  SHA512:
6
- metadata.gz: 3f7d4403d11076fcff2975c88a236fa8b6ace079d83b774000af1d59c08f36794057eaef56769a51bb8e21ab462bba9dbf9df60d704f285b788af8a67b6977ea
7
- data.tar.gz: 5d14c72130cada67dadfe880b2a0d723b312d8c27e7ef46452aa4ad65dce1055b66d8459b24c4b6d3f1adb5eeacf30c9fdc686b0190809ef051d393c33d3d33d
6
+ metadata.gz: 3ec9f92f4c6221184b7a0a2c118caa6a56e7bc8505a83d5b5acb4daeb769ff90d8822b43c28f57adc06435dd2df5577268721345c4061d3dad6ecb919be18efc
7
+ data.tar.gz: 53d54b0c6a82082438f2e2f1ca70d097a9b916bc283b72e52ce466b6f012c9624cf094586b17e93eaa49a796bf9911051d3f4b494b9ecc93c3ac6ee6cdc7e8fe
data/CHANGELOG.md CHANGED
@@ -1,6 +1,17 @@
1
1
  ## [Unreleased]
2
+ - New `Langchain::Processors::Pptx` to parse .pptx files
3
+ - New `Langchain::LLM::Anthropic#chat()` support
4
+ - Misc fixes
2
5
 
3
- ## [0.11.0]
6
+ ## [0.11.3]
7
+ - New `Langchain::Processors::Pptx` to parse .pptx files
8
+ - New `Langchain::LLM::Anthropic#chat()` support
9
+ - Misc fixes
10
+
11
+ ## [0.11.2]
12
+ - New `Langchain::Assistant#clear_thread!` and `Langchain::Assistant#instructions=` methods
13
+
14
+ ## [0.11.1]
4
15
  - Langchain::Tool::Vectorsearch that wraps Langchain::Vectorsearch::* classes. This allows the Assistant to call the tool and inject data from vector DBs.
5
16
 
6
17
  ## [0.11.0]
data/README.md CHANGED
@@ -59,7 +59,7 @@ Langchain.rb wraps supported LLMs in a unified interface allowing you to easily
59
59
  | -------- |:------------------:| :-------: | :-----------------: | :-------: | :----------------- |
60
60
  | [OpenAI](https://openai.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ❌ | Including Azure OpenAI |
61
61
  | [AI21](https://ai21.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ | ❌ | ✅ | |
62
- | [Anthropic](https://anthropic.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ | | ❌ | |
62
+ | [Anthropic](https://anthropic.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ | | ❌ | |
63
63
  | [AWS Bedrock](https://aws.amazon.com/bedrock?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ❌ | ❌ | Provides AWS, Cohere, AI21, Antropic and Stability AI models |
64
64
  | [Cohere](https://cohere.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
65
65
  | [GooglePalm](https://ai.google/discover/palm2?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
@@ -372,7 +372,7 @@ my_docx = Langchain.root.join("path/to/my.docx")
372
372
 
373
373
  client.add_data(paths: [my_pdf, my_text, my_docx])
374
374
  ```
375
- Supported file formats: docx, html, pdf, text, json, jsonl, csv, xlsx, eml.
375
+ Supported file formats: docx, html, pdf, text, json, jsonl, csv, xlsx, eml, pptx.
376
376
 
377
377
  Retrieve similar documents based on the query string passed in:
378
378
  ```ruby
@@ -125,12 +125,37 @@ module Langchain
125
125
  add_message(role: "tool", content: output, tool_call_id: tool_call_id)
126
126
  end
127
127
 
128
+ # Delete all messages in the thread
129
+ #
130
+ # @return [Array] Empty messages array
131
+ def clear_thread!
132
+ # TODO: If this a bug? Should we keep the "system" message?
133
+ thread.messages = []
134
+ end
135
+
136
+ # Set new instructions
137
+ #
138
+ # @param [String] New instructions that will be set as a system message
139
+ # @return [Array<Langchain::Message>] The messages in the thread
140
+ def instructions=(new_instructions)
141
+ @instructions = new_instructions
142
+
143
+ # Find message with role: "system" in thread.messages and delete it from the thread.messages array
144
+ thread.messages.delete_if(&:system?)
145
+
146
+ # Set new instructions by adding new system message
147
+ message = build_message(role: "system", content: new_instructions)
148
+ thread.messages.unshift(message)
149
+ end
150
+
128
151
  private
129
152
 
130
153
  # Call to the LLM#chat() method
131
154
  #
132
155
  # @return [Langchain::LLM::BaseResponse] The LLM response object
133
156
  def chat_with_llm
157
+ Langchain.logger.info("Sending a call to #{llm.class}", for: self.class)
158
+
134
159
  params = {messages: thread.openai_messages}
135
160
 
136
161
  if tools.any?
@@ -14,12 +14,19 @@ module Langchain::LLM
14
14
  DEFAULTS = {
15
15
  temperature: 0.0,
16
16
  completion_model_name: "claude-2",
17
+ chat_completion_model_name: "claude-3-sonnet-20240229",
17
18
  max_tokens_to_sample: 256
18
19
  }.freeze
19
20
 
20
21
  # TODO: Implement token length validator for Anthropic
21
22
  # LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AnthropicValidator
22
23
 
24
+ # Initialize an Anthropic LLM instance
25
+ #
26
+ # @param api_key [String] The API key to use
27
+ # @param llm_options [Hash] Options to pass to the Anthropic client
28
+ # @param default_options [Hash] Default options to use on every call to LLM, e.g.: { temperature:, completion_model_name:, chat_completion_model_name:, max_tokens_to_sample: }
29
+ # @return [Langchain::LLM::Anthropic] Langchain::LLM::Anthropic instance
23
30
  def initialize(api_key:, llm_options: {}, default_options: {})
24
31
  depends_on "anthropic"
25
32
 
@@ -27,17 +34,43 @@ module Langchain::LLM
27
34
  @defaults = DEFAULTS.merge(default_options)
28
35
  end
29
36
 
30
- #
31
37
  # Generate a completion for a given prompt
32
38
  #
33
- # @param prompt [String] The prompt to generate a completion for
34
- # @param params [Hash] extra parameters passed to Anthropic::Client#complete
39
+ # @param prompt [String] Prompt to generate a completion for
40
+ # @param model [String] The model to use
41
+ # @param max_tokens_to_sample [Integer] The maximum number of tokens to sample
42
+ # @param stop_sequences [Array<String>] The stop sequences to use
43
+ # @param temperature [Float] The temperature to use
44
+ # @param top_p [Float] The top p value to use
45
+ # @param top_k [Integer] The top k value to use
46
+ # @param metadata [Hash] The metadata to use
47
+ # @param stream [Boolean] Whether to stream the response
35
48
  # @return [Langchain::LLM::AnthropicResponse] The completion
36
- #
37
- def complete(prompt:, **params)
38
- parameters = compose_parameters @defaults[:completion_model_name], params
49
+ def complete(
50
+ prompt:,
51
+ model: @defaults[:completion_model_name],
52
+ max_tokens_to_sample: @defaults[:max_tokens_to_sample],
53
+ stop_sequences: nil,
54
+ temperature: @defaults[:temperature],
55
+ top_p: nil,
56
+ top_k: nil,
57
+ metadata: nil,
58
+ stream: nil
59
+ )
60
+ raise ArgumentError.new("model argument is required") if model.empty?
61
+ raise ArgumentError.new("max_tokens_to_sample argument is required") if max_tokens_to_sample.nil?
39
62
 
40
- parameters[:prompt] = prompt
63
+ parameters = {
64
+ model: model,
65
+ prompt: prompt,
66
+ max_tokens_to_sample: max_tokens_to_sample,
67
+ temperature: temperature
68
+ }
69
+ parameters[:stop_sequences] = stop_sequences if stop_sequences
70
+ parameters[:top_p] = top_p if top_p
71
+ parameters[:top_k] = top_k if top_k
72
+ parameters[:metadata] = metadata if metadata
73
+ parameters[:stream] = stream if stream
41
74
 
42
75
  # TODO: Implement token length validator for Anthropic
43
76
  # parameters[:max_tokens_to_sample] = validate_max_tokens(prompt, parameters[:completion_model_name])
@@ -46,12 +79,54 @@ module Langchain::LLM
46
79
  Langchain::LLM::AnthropicResponse.new(response)
47
80
  end
48
81
 
49
- private
82
+ # Generate a chat completion for given messages
83
+ #
84
+ # @param messages [Array<String>] Input messages
85
+ # @param model [String] The model that will complete your prompt
86
+ # @param max_tokens [Integer] Maximum number of tokens to generate before stopping
87
+ # @param metadata [Hash] Object describing metadata about the request
88
+ # @param stop_sequences [Array<String>] Custom text sequences that will cause the model to stop generating
89
+ # @param stream [Boolean] Whether to incrementally stream the response using server-sent events
90
+ # @param system [String] System prompt
91
+ # @param temperature [Float] Amount of randomness injected into the response
92
+ # @param tools [Array<String>] Definitions of tools that the model may use
93
+ # @param top_k [Integer] Only sample from the top K options for each subsequent token
94
+ # @param top_p [Float] Use nucleus sampling.
95
+ # @return [Langchain::LLM::AnthropicResponse] The chat completion
96
+ def chat(
97
+ messages: [],
98
+ model: @defaults[:chat_completion_model_name],
99
+ max_tokens: @defaults[:max_tokens_to_sample],
100
+ metadata: nil,
101
+ stop_sequences: nil,
102
+ stream: nil,
103
+ system: nil,
104
+ temperature: @defaults[:temperature],
105
+ tools: [],
106
+ top_k: nil,
107
+ top_p: nil
108
+ )
109
+ raise ArgumentError.new("messages argument is required") if messages.empty?
110
+ raise ArgumentError.new("model argument is required") if model.empty?
111
+ raise ArgumentError.new("max_tokens argument is required") if max_tokens.nil?
112
+
113
+ parameters = {
114
+ messages: messages,
115
+ model: model,
116
+ max_tokens: max_tokens,
117
+ temperature: temperature
118
+ }
119
+ parameters[:metadata] = metadata if metadata
120
+ parameters[:stop_sequences] = stop_sequences if stop_sequences
121
+ parameters[:stream] = stream if stream
122
+ parameters[:system] = system if system
123
+ parameters[:tools] = tools if tools.any?
124
+ parameters[:top_k] = top_k if top_k
125
+ parameters[:top_p] = top_p if top_p
50
126
 
51
- def compose_parameters(model, params)
52
- default_params = {model: model}.merge(@defaults.except(:completion_model_name))
127
+ response = client.messages(parameters: parameters)
53
128
 
54
- default_params.merge(params)
129
+ Langchain::LLM::AnthropicResponse.new(response)
55
130
  end
56
131
 
57
132
  # TODO: Implement token length validator for Anthropic
@@ -54,7 +54,7 @@ module Langchain::LLM
54
54
  model: defaults[:embeddings_model_name],
55
55
  encoding_format: nil,
56
56
  user: nil,
57
- dimensions: nil
57
+ dimensions: @defaults[:dimensions]
58
58
  )
59
59
  raise ArgumentError.new("text argument is required") if text.empty?
60
60
  raise ArgumentError.new("model argument is required") if model.empty?
@@ -100,7 +100,7 @@ module Langchain::LLM
100
100
  end
101
101
  # rubocop:enable Style/ArgumentsForwarding
102
102
 
103
- # Generate a chat completion for a given prompt or messages.
103
+ # Generate a chat completion for given messages.
104
104
  #
105
105
  # @param messages [Array<Hash>] List of messages comprising the conversation so far
106
106
  # @param model [String] ID of the model to use
@@ -185,7 +185,7 @@ module Langchain::LLM
185
185
  end
186
186
 
187
187
  def default_dimension
188
- @defaults[:dimension] || EMBEDDING_SIZES.fetch(defaults[:embeddings_model_name])
188
+ @defaults[:dimensions] || EMBEDDING_SIZES.fetch(defaults[:embeddings_model_name])
189
189
  end
190
190
 
191
191
  private
@@ -10,6 +10,10 @@ module Langchain::LLM
10
10
  completions.first
11
11
  end
12
12
 
13
+ def chat_completion
14
+ raw_response.dig("content", 0, "text")
15
+ end
16
+
13
17
  def completions
14
18
  [raw_response.dig("completion")]
15
19
  end
@@ -26,8 +30,20 @@ module Langchain::LLM
26
30
  raw_response.dig("log_id")
27
31
  end
28
32
 
33
+ def prompt_tokens
34
+ raw_response.dig("usage", "input_tokens").to_i
35
+ end
36
+
37
+ def completion_tokens
38
+ raw_response.dig("usage", "output_tokens").to_i
39
+ end
40
+
41
+ def total_tokens
42
+ prompt_tokens + completion_tokens
43
+ end
44
+
29
45
  def role
30
- "assistant"
46
+ raw_response.dig("role")
31
47
  end
32
48
  end
33
49
  end
@@ -5,15 +5,26 @@ require "csv"
5
5
  module Langchain
6
6
  module Processors
7
7
  class CSV < Base
8
+ class InvalidChunkMode < StandardError; end
9
+
8
10
  EXTENSIONS = [".csv"]
9
11
  CONTENT_TYPES = ["text/csv"]
12
+ CHUNK_MODE = {
13
+ row: "row",
14
+ file: "file"
15
+ }
10
16
 
11
17
  # Parse the document and return the text
12
18
  # @param [File] data
13
- # @return [Array of Hash]
19
+ # @return [String]
14
20
  def parse(data)
15
- ::CSV.new(data.read, col_sep: separator).map do |row|
16
- row.map(&:strip)
21
+ case chunk_mode
22
+ when CHUNK_MODE[:row]
23
+ chunk_row(data)
24
+ when CHUNK_MODE[:file]
25
+ chunk_file(data)
26
+ else
27
+ raise InvalidChunkMode
17
28
  end
18
29
  end
19
30
 
@@ -22,6 +33,29 @@ module Langchain
22
33
  def separator
23
34
  @options[:col_sep] || ","
24
35
  end
36
+
37
+ def chunk_mode
38
+ if @options[:chunk_mode].to_s.empty?
39
+ CHUNK_MODE[:row]
40
+ else
41
+ raise InvalidChunkMode unless CHUNK_MODE.value?(@options[:chunk_mode])
42
+
43
+ @options[:chunk_mode]
44
+ end
45
+ end
46
+
47
+ def chunk_row(data)
48
+ ::CSV.new(data.read, col_sep: separator).map do |row|
49
+ row
50
+ .compact
51
+ .map(&:strip)
52
+ .join(separator)
53
+ end.join("\n\n")
54
+ end
55
+
56
+ def chunk_file(data)
57
+ data.read
58
+ end
25
59
  end
26
60
  end
27
61
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Processors
5
+ class Pptx < Base
6
+ EXTENSIONS = [".pptx"]
7
+ CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.presentationml.presentation"]
8
+
9
+ def initialize(*)
10
+ depends_on "power_point_pptx"
11
+ end
12
+
13
+ # Parse the document and return the text
14
+ # @param [File] data
15
+ # @return [String]
16
+ def parse(data)
17
+ presentation = PowerPointPptx::Document.open(data)
18
+
19
+ slides = presentation.slides
20
+ contents = slides.map(&:content)
21
+ text = contents.map do |sections|
22
+ sections.map(&:strip).join(" ")
23
+ end
24
+
25
+ text.join("\n\n")
26
+ end
27
+ end
28
+ end
29
+ end
@@ -86,6 +86,10 @@ module Langchain::Vectorsearch
86
86
  client.search_knn(embedding, k)
87
87
  end
88
88
 
89
+ # TODO: Add the ask() method
90
+ # def ask
91
+ # end
92
+
89
93
  private
90
94
 
91
95
  #
@@ -16,7 +16,8 @@ module Langchain::Vectorsearch
16
16
  # The operators supported by the PostgreSQL vector search adapter
17
17
  OPERATORS = {
18
18
  "cosine_distance" => "cosine",
19
- "euclidean_distance" => "euclidean"
19
+ "euclidean_distance" => "euclidean",
20
+ "inner_product_distance" => "inner_product"
20
21
  }
21
22
  DEFAULT_OPERATOR = "cosine_distance"
22
23
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.11.1"
4
+ VERSION = "0.11.3"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.1
4
+ version: 0.11.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-04-08 00:00:00.000000000 Z
11
+ date: 2024-04-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -238,16 +238,16 @@ dependencies:
238
238
  name: anthropic
239
239
  requirement: !ruby/object:Gem::Requirement
240
240
  requirements:
241
- - - "~>"
241
+ - - ">="
242
242
  - !ruby/object:Gem::Version
243
- version: 0.1.0
243
+ version: '0'
244
244
  type: :development
245
245
  prerelease: false
246
246
  version_requirements: !ruby/object:Gem::Requirement
247
247
  requirements:
248
- - - "~>"
248
+ - - ">="
249
249
  - !ruby/object:Gem::Version
250
- version: 0.1.0
250
+ version: '0'
251
251
  - !ruby/object:Gem::Dependency
252
252
  name: aws-sdk-bedrockruntime
253
253
  requirement: !ruby/object:Gem::Requirement
@@ -682,6 +682,20 @@ dependencies:
682
682
  - - ">="
683
683
  - !ruby/object:Gem::Version
684
684
  version: '0'
685
+ - !ruby/object:Gem::Dependency
686
+ name: power_point_pptx
687
+ requirement: !ruby/object:Gem::Requirement
688
+ requirements:
689
+ - - "~>"
690
+ - !ruby/object:Gem::Version
691
+ version: 0.1.0
692
+ type: :development
693
+ prerelease: false
694
+ version_requirements: !ruby/object:Gem::Requirement
695
+ requirements:
696
+ - - "~>"
697
+ - !ruby/object:Gem::Version
698
+ version: 0.1.0
685
699
  description: Build LLM-backed Ruby applications with Ruby's Langchain.rb
686
700
  email:
687
701
  - andrei.bondarev13@gmail.com
@@ -758,6 +772,7 @@ files:
758
772
  - lib/langchain/processors/jsonl.rb
759
773
  - lib/langchain/processors/markdown.rb
760
774
  - lib/langchain/processors/pdf.rb
775
+ - lib/langchain/processors/pptx.rb
761
776
  - lib/langchain/processors/text.rb
762
777
  - lib/langchain/processors/xlsx.rb
763
778
  - lib/langchain/prompt.rb