langchainrb 0.11.1 → 0.11.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -1
- data/README.md +2 -2
- data/lib/langchain/assistants/assistant.rb +25 -0
- data/lib/langchain/llm/anthropic.rb +86 -11
- data/lib/langchain/llm/openai.rb +3 -3
- data/lib/langchain/llm/response/anthropic_response.rb +17 -1
- data/lib/langchain/processors/csv.rb +37 -3
- data/lib/langchain/processors/pptx.rb +29 -0
- data/lib/langchain/vectorsearch/hnswlib.rb +4 -0
- data/lib/langchain/vectorsearch/pgvector.rb +2 -1
- data/lib/langchain/version.rb +1 -1
- metadata +21 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c678ae75bc25b0501223f5b6ffd396a9159af4d0ddd87ddb1657429ed2ba24ce
|
4
|
+
data.tar.gz: df50ef0a6d9c1a3100153a06084556cac983069d1a38739bd6606f39f63bd332
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3ec9f92f4c6221184b7a0a2c118caa6a56e7bc8505a83d5b5acb4daeb769ff90d8822b43c28f57adc06435dd2df5577268721345c4061d3dad6ecb919be18efc
|
7
|
+
data.tar.gz: 53d54b0c6a82082438f2e2f1ca70d097a9b916bc283b72e52ce466b6f012c9624cf094586b17e93eaa49a796bf9911051d3f4b494b9ecc93c3ac6ee6cdc7e8fe
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,17 @@
|
|
1
1
|
## [Unreleased]
|
2
|
+
- New `Langchain::Processors::Pptx` to parse .pptx files
|
3
|
+
- New `Langchain::LLM::Anthropic#chat()` support
|
4
|
+
- Misc fixes
|
2
5
|
|
3
|
-
## [0.11.
|
6
|
+
## [0.11.3]
|
7
|
+
- New `Langchain::Processors::Pptx` to parse .pptx files
|
8
|
+
- New `Langchain::LLM::Anthropic#chat()` support
|
9
|
+
- Misc fixes
|
10
|
+
|
11
|
+
## [0.11.2]
|
12
|
+
- New `Langchain::Assistant#clear_thread!` and `Langchain::Assistant#instructions=` methods
|
13
|
+
|
14
|
+
## [0.11.1]
|
4
15
|
- Langchain::Tool::Vectorsearch that wraps Langchain::Vectorsearch::* classes. This allows the Assistant to call the tool and inject data from vector DBs.
|
5
16
|
|
6
17
|
## [0.11.0]
|
data/README.md
CHANGED
@@ -59,7 +59,7 @@ Langchain.rb wraps supported LLMs in a unified interface allowing you to easily
|
|
59
59
|
| -------- |:------------------:| :-------: | :-----------------: | :-------: | :----------------- |
|
60
60
|
| [OpenAI](https://openai.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ❌ | Including Azure OpenAI |
|
61
61
|
| [AI21](https://ai21.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ | ❌ | ✅ | |
|
62
|
-
| [Anthropic](https://anthropic.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ |
|
62
|
+
| [Anthropic](https://anthropic.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ | ✅ | ❌ | |
|
63
63
|
| [AWS Bedrock](https://aws.amazon.com/bedrock?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ❌ | ❌ | Provides AWS, Cohere, AI21, Antropic and Stability AI models |
|
64
64
|
| [Cohere](https://cohere.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
|
65
65
|
| [GooglePalm](https://ai.google/discover/palm2?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
|
@@ -372,7 +372,7 @@ my_docx = Langchain.root.join("path/to/my.docx")
|
|
372
372
|
|
373
373
|
client.add_data(paths: [my_pdf, my_text, my_docx])
|
374
374
|
```
|
375
|
-
Supported file formats: docx, html, pdf, text, json, jsonl, csv, xlsx, eml.
|
375
|
+
Supported file formats: docx, html, pdf, text, json, jsonl, csv, xlsx, eml, pptx.
|
376
376
|
|
377
377
|
Retrieve similar documents based on the query string passed in:
|
378
378
|
```ruby
|
@@ -125,12 +125,37 @@ module Langchain
|
|
125
125
|
add_message(role: "tool", content: output, tool_call_id: tool_call_id)
|
126
126
|
end
|
127
127
|
|
128
|
+
# Delete all messages in the thread
|
129
|
+
#
|
130
|
+
# @return [Array] Empty messages array
|
131
|
+
def clear_thread!
|
132
|
+
# TODO: If this a bug? Should we keep the "system" message?
|
133
|
+
thread.messages = []
|
134
|
+
end
|
135
|
+
|
136
|
+
# Set new instructions
|
137
|
+
#
|
138
|
+
# @param [String] New instructions that will be set as a system message
|
139
|
+
# @return [Array<Langchain::Message>] The messages in the thread
|
140
|
+
def instructions=(new_instructions)
|
141
|
+
@instructions = new_instructions
|
142
|
+
|
143
|
+
# Find message with role: "system" in thread.messages and delete it from the thread.messages array
|
144
|
+
thread.messages.delete_if(&:system?)
|
145
|
+
|
146
|
+
# Set new instructions by adding new system message
|
147
|
+
message = build_message(role: "system", content: new_instructions)
|
148
|
+
thread.messages.unshift(message)
|
149
|
+
end
|
150
|
+
|
128
151
|
private
|
129
152
|
|
130
153
|
# Call to the LLM#chat() method
|
131
154
|
#
|
132
155
|
# @return [Langchain::LLM::BaseResponse] The LLM response object
|
133
156
|
def chat_with_llm
|
157
|
+
Langchain.logger.info("Sending a call to #{llm.class}", for: self.class)
|
158
|
+
|
134
159
|
params = {messages: thread.openai_messages}
|
135
160
|
|
136
161
|
if tools.any?
|
@@ -14,12 +14,19 @@ module Langchain::LLM
|
|
14
14
|
DEFAULTS = {
|
15
15
|
temperature: 0.0,
|
16
16
|
completion_model_name: "claude-2",
|
17
|
+
chat_completion_model_name: "claude-3-sonnet-20240229",
|
17
18
|
max_tokens_to_sample: 256
|
18
19
|
}.freeze
|
19
20
|
|
20
21
|
# TODO: Implement token length validator for Anthropic
|
21
22
|
# LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AnthropicValidator
|
22
23
|
|
24
|
+
# Initialize an Anthropic LLM instance
|
25
|
+
#
|
26
|
+
# @param api_key [String] The API key to use
|
27
|
+
# @param llm_options [Hash] Options to pass to the Anthropic client
|
28
|
+
# @param default_options [Hash] Default options to use on every call to LLM, e.g.: { temperature:, completion_model_name:, chat_completion_model_name:, max_tokens_to_sample: }
|
29
|
+
# @return [Langchain::LLM::Anthropic] Langchain::LLM::Anthropic instance
|
23
30
|
def initialize(api_key:, llm_options: {}, default_options: {})
|
24
31
|
depends_on "anthropic"
|
25
32
|
|
@@ -27,17 +34,43 @@ module Langchain::LLM
|
|
27
34
|
@defaults = DEFAULTS.merge(default_options)
|
28
35
|
end
|
29
36
|
|
30
|
-
#
|
31
37
|
# Generate a completion for a given prompt
|
32
38
|
#
|
33
|
-
# @param prompt [String]
|
34
|
-
# @param
|
39
|
+
# @param prompt [String] Prompt to generate a completion for
|
40
|
+
# @param model [String] The model to use
|
41
|
+
# @param max_tokens_to_sample [Integer] The maximum number of tokens to sample
|
42
|
+
# @param stop_sequences [Array<String>] The stop sequences to use
|
43
|
+
# @param temperature [Float] The temperature to use
|
44
|
+
# @param top_p [Float] The top p value to use
|
45
|
+
# @param top_k [Integer] The top k value to use
|
46
|
+
# @param metadata [Hash] The metadata to use
|
47
|
+
# @param stream [Boolean] Whether to stream the response
|
35
48
|
# @return [Langchain::LLM::AnthropicResponse] The completion
|
36
|
-
|
37
|
-
|
38
|
-
|
49
|
+
def complete(
|
50
|
+
prompt:,
|
51
|
+
model: @defaults[:completion_model_name],
|
52
|
+
max_tokens_to_sample: @defaults[:max_tokens_to_sample],
|
53
|
+
stop_sequences: nil,
|
54
|
+
temperature: @defaults[:temperature],
|
55
|
+
top_p: nil,
|
56
|
+
top_k: nil,
|
57
|
+
metadata: nil,
|
58
|
+
stream: nil
|
59
|
+
)
|
60
|
+
raise ArgumentError.new("model argument is required") if model.empty?
|
61
|
+
raise ArgumentError.new("max_tokens_to_sample argument is required") if max_tokens_to_sample.nil?
|
39
62
|
|
40
|
-
parameters
|
63
|
+
parameters = {
|
64
|
+
model: model,
|
65
|
+
prompt: prompt,
|
66
|
+
max_tokens_to_sample: max_tokens_to_sample,
|
67
|
+
temperature: temperature
|
68
|
+
}
|
69
|
+
parameters[:stop_sequences] = stop_sequences if stop_sequences
|
70
|
+
parameters[:top_p] = top_p if top_p
|
71
|
+
parameters[:top_k] = top_k if top_k
|
72
|
+
parameters[:metadata] = metadata if metadata
|
73
|
+
parameters[:stream] = stream if stream
|
41
74
|
|
42
75
|
# TODO: Implement token length validator for Anthropic
|
43
76
|
# parameters[:max_tokens_to_sample] = validate_max_tokens(prompt, parameters[:completion_model_name])
|
@@ -46,12 +79,54 @@ module Langchain::LLM
|
|
46
79
|
Langchain::LLM::AnthropicResponse.new(response)
|
47
80
|
end
|
48
81
|
|
49
|
-
|
82
|
+
# Generate a chat completion for given messages
|
83
|
+
#
|
84
|
+
# @param messages [Array<String>] Input messages
|
85
|
+
# @param model [String] The model that will complete your prompt
|
86
|
+
# @param max_tokens [Integer] Maximum number of tokens to generate before stopping
|
87
|
+
# @param metadata [Hash] Object describing metadata about the request
|
88
|
+
# @param stop_sequences [Array<String>] Custom text sequences that will cause the model to stop generating
|
89
|
+
# @param stream [Boolean] Whether to incrementally stream the response using server-sent events
|
90
|
+
# @param system [String] System prompt
|
91
|
+
# @param temperature [Float] Amount of randomness injected into the response
|
92
|
+
# @param tools [Array<String>] Definitions of tools that the model may use
|
93
|
+
# @param top_k [Integer] Only sample from the top K options for each subsequent token
|
94
|
+
# @param top_p [Float] Use nucleus sampling.
|
95
|
+
# @return [Langchain::LLM::AnthropicResponse] The chat completion
|
96
|
+
def chat(
|
97
|
+
messages: [],
|
98
|
+
model: @defaults[:chat_completion_model_name],
|
99
|
+
max_tokens: @defaults[:max_tokens_to_sample],
|
100
|
+
metadata: nil,
|
101
|
+
stop_sequences: nil,
|
102
|
+
stream: nil,
|
103
|
+
system: nil,
|
104
|
+
temperature: @defaults[:temperature],
|
105
|
+
tools: [],
|
106
|
+
top_k: nil,
|
107
|
+
top_p: nil
|
108
|
+
)
|
109
|
+
raise ArgumentError.new("messages argument is required") if messages.empty?
|
110
|
+
raise ArgumentError.new("model argument is required") if model.empty?
|
111
|
+
raise ArgumentError.new("max_tokens argument is required") if max_tokens.nil?
|
112
|
+
|
113
|
+
parameters = {
|
114
|
+
messages: messages,
|
115
|
+
model: model,
|
116
|
+
max_tokens: max_tokens,
|
117
|
+
temperature: temperature
|
118
|
+
}
|
119
|
+
parameters[:metadata] = metadata if metadata
|
120
|
+
parameters[:stop_sequences] = stop_sequences if stop_sequences
|
121
|
+
parameters[:stream] = stream if stream
|
122
|
+
parameters[:system] = system if system
|
123
|
+
parameters[:tools] = tools if tools.any?
|
124
|
+
parameters[:top_k] = top_k if top_k
|
125
|
+
parameters[:top_p] = top_p if top_p
|
50
126
|
|
51
|
-
|
52
|
-
default_params = {model: model}.merge(@defaults.except(:completion_model_name))
|
127
|
+
response = client.messages(parameters: parameters)
|
53
128
|
|
54
|
-
|
129
|
+
Langchain::LLM::AnthropicResponse.new(response)
|
55
130
|
end
|
56
131
|
|
57
132
|
# TODO: Implement token length validator for Anthropic
|
data/lib/langchain/llm/openai.rb
CHANGED
@@ -54,7 +54,7 @@ module Langchain::LLM
|
|
54
54
|
model: defaults[:embeddings_model_name],
|
55
55
|
encoding_format: nil,
|
56
56
|
user: nil,
|
57
|
-
dimensions:
|
57
|
+
dimensions: @defaults[:dimensions]
|
58
58
|
)
|
59
59
|
raise ArgumentError.new("text argument is required") if text.empty?
|
60
60
|
raise ArgumentError.new("model argument is required") if model.empty?
|
@@ -100,7 +100,7 @@ module Langchain::LLM
|
|
100
100
|
end
|
101
101
|
# rubocop:enable Style/ArgumentsForwarding
|
102
102
|
|
103
|
-
# Generate a chat completion for
|
103
|
+
# Generate a chat completion for given messages.
|
104
104
|
#
|
105
105
|
# @param messages [Array<Hash>] List of messages comprising the conversation so far
|
106
106
|
# @param model [String] ID of the model to use
|
@@ -185,7 +185,7 @@ module Langchain::LLM
|
|
185
185
|
end
|
186
186
|
|
187
187
|
def default_dimension
|
188
|
-
@defaults[:
|
188
|
+
@defaults[:dimensions] || EMBEDDING_SIZES.fetch(defaults[:embeddings_model_name])
|
189
189
|
end
|
190
190
|
|
191
191
|
private
|
@@ -10,6 +10,10 @@ module Langchain::LLM
|
|
10
10
|
completions.first
|
11
11
|
end
|
12
12
|
|
13
|
+
def chat_completion
|
14
|
+
raw_response.dig("content", 0, "text")
|
15
|
+
end
|
16
|
+
|
13
17
|
def completions
|
14
18
|
[raw_response.dig("completion")]
|
15
19
|
end
|
@@ -26,8 +30,20 @@ module Langchain::LLM
|
|
26
30
|
raw_response.dig("log_id")
|
27
31
|
end
|
28
32
|
|
33
|
+
def prompt_tokens
|
34
|
+
raw_response.dig("usage", "input_tokens").to_i
|
35
|
+
end
|
36
|
+
|
37
|
+
def completion_tokens
|
38
|
+
raw_response.dig("usage", "output_tokens").to_i
|
39
|
+
end
|
40
|
+
|
41
|
+
def total_tokens
|
42
|
+
prompt_tokens + completion_tokens
|
43
|
+
end
|
44
|
+
|
29
45
|
def role
|
30
|
-
"
|
46
|
+
raw_response.dig("role")
|
31
47
|
end
|
32
48
|
end
|
33
49
|
end
|
@@ -5,15 +5,26 @@ require "csv"
|
|
5
5
|
module Langchain
|
6
6
|
module Processors
|
7
7
|
class CSV < Base
|
8
|
+
class InvalidChunkMode < StandardError; end
|
9
|
+
|
8
10
|
EXTENSIONS = [".csv"]
|
9
11
|
CONTENT_TYPES = ["text/csv"]
|
12
|
+
CHUNK_MODE = {
|
13
|
+
row: "row",
|
14
|
+
file: "file"
|
15
|
+
}
|
10
16
|
|
11
17
|
# Parse the document and return the text
|
12
18
|
# @param [File] data
|
13
|
-
# @return [
|
19
|
+
# @return [String]
|
14
20
|
def parse(data)
|
15
|
-
|
16
|
-
|
21
|
+
case chunk_mode
|
22
|
+
when CHUNK_MODE[:row]
|
23
|
+
chunk_row(data)
|
24
|
+
when CHUNK_MODE[:file]
|
25
|
+
chunk_file(data)
|
26
|
+
else
|
27
|
+
raise InvalidChunkMode
|
17
28
|
end
|
18
29
|
end
|
19
30
|
|
@@ -22,6 +33,29 @@ module Langchain
|
|
22
33
|
def separator
|
23
34
|
@options[:col_sep] || ","
|
24
35
|
end
|
36
|
+
|
37
|
+
def chunk_mode
|
38
|
+
if @options[:chunk_mode].to_s.empty?
|
39
|
+
CHUNK_MODE[:row]
|
40
|
+
else
|
41
|
+
raise InvalidChunkMode unless CHUNK_MODE.value?(@options[:chunk_mode])
|
42
|
+
|
43
|
+
@options[:chunk_mode]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def chunk_row(data)
|
48
|
+
::CSV.new(data.read, col_sep: separator).map do |row|
|
49
|
+
row
|
50
|
+
.compact
|
51
|
+
.map(&:strip)
|
52
|
+
.join(separator)
|
53
|
+
end.join("\n\n")
|
54
|
+
end
|
55
|
+
|
56
|
+
def chunk_file(data)
|
57
|
+
data.read
|
58
|
+
end
|
25
59
|
end
|
26
60
|
end
|
27
61
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Processors
|
5
|
+
class Pptx < Base
|
6
|
+
EXTENSIONS = [".pptx"]
|
7
|
+
CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.presentationml.presentation"]
|
8
|
+
|
9
|
+
def initialize(*)
|
10
|
+
depends_on "power_point_pptx"
|
11
|
+
end
|
12
|
+
|
13
|
+
# Parse the document and return the text
|
14
|
+
# @param [File] data
|
15
|
+
# @return [String]
|
16
|
+
def parse(data)
|
17
|
+
presentation = PowerPointPptx::Document.open(data)
|
18
|
+
|
19
|
+
slides = presentation.slides
|
20
|
+
contents = slides.map(&:content)
|
21
|
+
text = contents.map do |sections|
|
22
|
+
sections.map(&:strip).join(" ")
|
23
|
+
end
|
24
|
+
|
25
|
+
text.join("\n\n")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -16,7 +16,8 @@ module Langchain::Vectorsearch
|
|
16
16
|
# The operators supported by the PostgreSQL vector search adapter
|
17
17
|
OPERATORS = {
|
18
18
|
"cosine_distance" => "cosine",
|
19
|
-
"euclidean_distance" => "euclidean"
|
19
|
+
"euclidean_distance" => "euclidean",
|
20
|
+
"inner_product_distance" => "inner_product"
|
20
21
|
}
|
21
22
|
DEFAULT_OPERATOR = "cosine_distance"
|
22
23
|
|
data/lib/langchain/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-04-
|
11
|
+
date: 2024-04-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -238,16 +238,16 @@ dependencies:
|
|
238
238
|
name: anthropic
|
239
239
|
requirement: !ruby/object:Gem::Requirement
|
240
240
|
requirements:
|
241
|
-
- - "
|
241
|
+
- - ">="
|
242
242
|
- !ruby/object:Gem::Version
|
243
|
-
version: 0
|
243
|
+
version: '0'
|
244
244
|
type: :development
|
245
245
|
prerelease: false
|
246
246
|
version_requirements: !ruby/object:Gem::Requirement
|
247
247
|
requirements:
|
248
|
-
- - "
|
248
|
+
- - ">="
|
249
249
|
- !ruby/object:Gem::Version
|
250
|
-
version: 0
|
250
|
+
version: '0'
|
251
251
|
- !ruby/object:Gem::Dependency
|
252
252
|
name: aws-sdk-bedrockruntime
|
253
253
|
requirement: !ruby/object:Gem::Requirement
|
@@ -682,6 +682,20 @@ dependencies:
|
|
682
682
|
- - ">="
|
683
683
|
- !ruby/object:Gem::Version
|
684
684
|
version: '0'
|
685
|
+
- !ruby/object:Gem::Dependency
|
686
|
+
name: power_point_pptx
|
687
|
+
requirement: !ruby/object:Gem::Requirement
|
688
|
+
requirements:
|
689
|
+
- - "~>"
|
690
|
+
- !ruby/object:Gem::Version
|
691
|
+
version: 0.1.0
|
692
|
+
type: :development
|
693
|
+
prerelease: false
|
694
|
+
version_requirements: !ruby/object:Gem::Requirement
|
695
|
+
requirements:
|
696
|
+
- - "~>"
|
697
|
+
- !ruby/object:Gem::Version
|
698
|
+
version: 0.1.0
|
685
699
|
description: Build LLM-backed Ruby applications with Ruby's Langchain.rb
|
686
700
|
email:
|
687
701
|
- andrei.bondarev13@gmail.com
|
@@ -758,6 +772,7 @@ files:
|
|
758
772
|
- lib/langchain/processors/jsonl.rb
|
759
773
|
- lib/langchain/processors/markdown.rb
|
760
774
|
- lib/langchain/processors/pdf.rb
|
775
|
+
- lib/langchain/processors/pptx.rb
|
761
776
|
- lib/langchain/processors/text.rb
|
762
777
|
- lib/langchain/processors/xlsx.rb
|
763
778
|
- lib/langchain/prompt.rb
|