langchainrb 0.11.2 → 0.11.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +2 -2
- data/lib/langchain/assistants/assistant.rb +1 -0
- data/lib/langchain/llm/anthropic.rb +86 -11
- data/lib/langchain/llm/openai.rb +1 -1
- data/lib/langchain/llm/response/anthropic_response.rb +17 -1
- data/lib/langchain/processors/csv.rb +37 -3
- data/lib/langchain/processors/pptx.rb +29 -0
- data/lib/langchain/vectorsearch/pgvector.rb +2 -1
- data/lib/langchain/version.rb +1 -1
- metadata +21 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c678ae75bc25b0501223f5b6ffd396a9159af4d0ddd87ddb1657429ed2ba24ce
|
4
|
+
data.tar.gz: df50ef0a6d9c1a3100153a06084556cac983069d1a38739bd6606f39f63bd332
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3ec9f92f4c6221184b7a0a2c118caa6a56e7bc8505a83d5b5acb4daeb769ff90d8822b43c28f57adc06435dd2df5577268721345c4061d3dad6ecb919be18efc
|
7
|
+
data.tar.gz: 53d54b0c6a82082438f2e2f1ca70d097a9b916bc283b72e52ce466b6f012c9624cf094586b17e93eaa49a796bf9911051d3f4b494b9ecc93c3ac6ee6cdc7e8fe
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,12 @@
|
|
1
1
|
## [Unreleased]
|
2
|
+
- New `Langchain::Processors::Pptx` to parse .pptx files
|
3
|
+
- New `Langchain::LLM::Anthropic#chat()` support
|
4
|
+
- Misc fixes
|
5
|
+
|
6
|
+
## [0.11.3]
|
7
|
+
- New `Langchain::Processors::Pptx` to parse .pptx files
|
8
|
+
- New `Langchain::LLM::Anthropic#chat()` support
|
9
|
+
- Misc fixes
|
2
10
|
|
3
11
|
## [0.11.2]
|
4
12
|
- New `Langchain::Assistant#clear_thread!` and `Langchain::Assistant#instructions=` methods
|
data/README.md
CHANGED
@@ -59,7 +59,7 @@ Langchain.rb wraps supported LLMs in a unified interface allowing you to easily
|
|
59
59
|
| -------- |:------------------:| :-------: | :-----------------: | :-------: | :----------------- |
|
60
60
|
| [OpenAI](https://openai.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ❌ | Including Azure OpenAI |
|
61
61
|
| [AI21](https://ai21.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ | ❌ | ✅ | |
|
62
|
-
| [Anthropic](https://anthropic.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ |
|
62
|
+
| [Anthropic](https://anthropic.com/?utm_source=langchainrb&utm_medium=github) | ❌ | ✅ | ✅ | ❌ | |
|
63
63
|
| [AWS Bedrock](https://aws.amazon.com/bedrock?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ❌ | ❌ | Provides AWS, Cohere, AI21, Antropic and Stability AI models |
|
64
64
|
| [Cohere](https://cohere.com/?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
|
65
65
|
| [GooglePalm](https://ai.google/discover/palm2?utm_source=langchainrb&utm_medium=github) | ✅ | ✅ | ✅ | ✅ | |
|
@@ -372,7 +372,7 @@ my_docx = Langchain.root.join("path/to/my.docx")
|
|
372
372
|
|
373
373
|
client.add_data(paths: [my_pdf, my_text, my_docx])
|
374
374
|
```
|
375
|
-
Supported file formats: docx, html, pdf, text, json, jsonl, csv, xlsx, eml.
|
375
|
+
Supported file formats: docx, html, pdf, text, json, jsonl, csv, xlsx, eml, pptx.
|
376
376
|
|
377
377
|
Retrieve similar documents based on the query string passed in:
|
378
378
|
```ruby
|
@@ -14,12 +14,19 @@ module Langchain::LLM
|
|
14
14
|
DEFAULTS = {
|
15
15
|
temperature: 0.0,
|
16
16
|
completion_model_name: "claude-2",
|
17
|
+
chat_completion_model_name: "claude-3-sonnet-20240229",
|
17
18
|
max_tokens_to_sample: 256
|
18
19
|
}.freeze
|
19
20
|
|
20
21
|
# TODO: Implement token length validator for Anthropic
|
21
22
|
# LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AnthropicValidator
|
22
23
|
|
24
|
+
# Initialize an Anthropic LLM instance
|
25
|
+
#
|
26
|
+
# @param api_key [String] The API key to use
|
27
|
+
# @param llm_options [Hash] Options to pass to the Anthropic client
|
28
|
+
# @param default_options [Hash] Default options to use on every call to LLM, e.g.: { temperature:, completion_model_name:, chat_completion_model_name:, max_tokens_to_sample: }
|
29
|
+
# @return [Langchain::LLM::Anthropic] Langchain::LLM::Anthropic instance
|
23
30
|
def initialize(api_key:, llm_options: {}, default_options: {})
|
24
31
|
depends_on "anthropic"
|
25
32
|
|
@@ -27,17 +34,43 @@ module Langchain::LLM
|
|
27
34
|
@defaults = DEFAULTS.merge(default_options)
|
28
35
|
end
|
29
36
|
|
30
|
-
#
|
31
37
|
# Generate a completion for a given prompt
|
32
38
|
#
|
33
|
-
# @param prompt [String]
|
34
|
-
# @param
|
39
|
+
# @param prompt [String] Prompt to generate a completion for
|
40
|
+
# @param model [String] The model to use
|
41
|
+
# @param max_tokens_to_sample [Integer] The maximum number of tokens to sample
|
42
|
+
# @param stop_sequences [Array<String>] The stop sequences to use
|
43
|
+
# @param temperature [Float] The temperature to use
|
44
|
+
# @param top_p [Float] The top p value to use
|
45
|
+
# @param top_k [Integer] The top k value to use
|
46
|
+
# @param metadata [Hash] The metadata to use
|
47
|
+
# @param stream [Boolean] Whether to stream the response
|
35
48
|
# @return [Langchain::LLM::AnthropicResponse] The completion
|
36
|
-
|
37
|
-
|
38
|
-
|
49
|
+
def complete(
|
50
|
+
prompt:,
|
51
|
+
model: @defaults[:completion_model_name],
|
52
|
+
max_tokens_to_sample: @defaults[:max_tokens_to_sample],
|
53
|
+
stop_sequences: nil,
|
54
|
+
temperature: @defaults[:temperature],
|
55
|
+
top_p: nil,
|
56
|
+
top_k: nil,
|
57
|
+
metadata: nil,
|
58
|
+
stream: nil
|
59
|
+
)
|
60
|
+
raise ArgumentError.new("model argument is required") if model.empty?
|
61
|
+
raise ArgumentError.new("max_tokens_to_sample argument is required") if max_tokens_to_sample.nil?
|
39
62
|
|
40
|
-
parameters
|
63
|
+
parameters = {
|
64
|
+
model: model,
|
65
|
+
prompt: prompt,
|
66
|
+
max_tokens_to_sample: max_tokens_to_sample,
|
67
|
+
temperature: temperature
|
68
|
+
}
|
69
|
+
parameters[:stop_sequences] = stop_sequences if stop_sequences
|
70
|
+
parameters[:top_p] = top_p if top_p
|
71
|
+
parameters[:top_k] = top_k if top_k
|
72
|
+
parameters[:metadata] = metadata if metadata
|
73
|
+
parameters[:stream] = stream if stream
|
41
74
|
|
42
75
|
# TODO: Implement token length validator for Anthropic
|
43
76
|
# parameters[:max_tokens_to_sample] = validate_max_tokens(prompt, parameters[:completion_model_name])
|
@@ -46,12 +79,54 @@ module Langchain::LLM
|
|
46
79
|
Langchain::LLM::AnthropicResponse.new(response)
|
47
80
|
end
|
48
81
|
|
49
|
-
|
82
|
+
# Generate a chat completion for given messages
|
83
|
+
#
|
84
|
+
# @param messages [Array<String>] Input messages
|
85
|
+
# @param model [String] The model that will complete your prompt
|
86
|
+
# @param max_tokens [Integer] Maximum number of tokens to generate before stopping
|
87
|
+
# @param metadata [Hash] Object describing metadata about the request
|
88
|
+
# @param stop_sequences [Array<String>] Custom text sequences that will cause the model to stop generating
|
89
|
+
# @param stream [Boolean] Whether to incrementally stream the response using server-sent events
|
90
|
+
# @param system [String] System prompt
|
91
|
+
# @param temperature [Float] Amount of randomness injected into the response
|
92
|
+
# @param tools [Array<String>] Definitions of tools that the model may use
|
93
|
+
# @param top_k [Integer] Only sample from the top K options for each subsequent token
|
94
|
+
# @param top_p [Float] Use nucleus sampling.
|
95
|
+
# @return [Langchain::LLM::AnthropicResponse] The chat completion
|
96
|
+
def chat(
|
97
|
+
messages: [],
|
98
|
+
model: @defaults[:chat_completion_model_name],
|
99
|
+
max_tokens: @defaults[:max_tokens_to_sample],
|
100
|
+
metadata: nil,
|
101
|
+
stop_sequences: nil,
|
102
|
+
stream: nil,
|
103
|
+
system: nil,
|
104
|
+
temperature: @defaults[:temperature],
|
105
|
+
tools: [],
|
106
|
+
top_k: nil,
|
107
|
+
top_p: nil
|
108
|
+
)
|
109
|
+
raise ArgumentError.new("messages argument is required") if messages.empty?
|
110
|
+
raise ArgumentError.new("model argument is required") if model.empty?
|
111
|
+
raise ArgumentError.new("max_tokens argument is required") if max_tokens.nil?
|
112
|
+
|
113
|
+
parameters = {
|
114
|
+
messages: messages,
|
115
|
+
model: model,
|
116
|
+
max_tokens: max_tokens,
|
117
|
+
temperature: temperature
|
118
|
+
}
|
119
|
+
parameters[:metadata] = metadata if metadata
|
120
|
+
parameters[:stop_sequences] = stop_sequences if stop_sequences
|
121
|
+
parameters[:stream] = stream if stream
|
122
|
+
parameters[:system] = system if system
|
123
|
+
parameters[:tools] = tools if tools.any?
|
124
|
+
parameters[:top_k] = top_k if top_k
|
125
|
+
parameters[:top_p] = top_p if top_p
|
50
126
|
|
51
|
-
|
52
|
-
default_params = {model: model}.merge(@defaults.except(:completion_model_name))
|
127
|
+
response = client.messages(parameters: parameters)
|
53
128
|
|
54
|
-
|
129
|
+
Langchain::LLM::AnthropicResponse.new(response)
|
55
130
|
end
|
56
131
|
|
57
132
|
# TODO: Implement token length validator for Anthropic
|
data/lib/langchain/llm/openai.rb
CHANGED
@@ -100,7 +100,7 @@ module Langchain::LLM
|
|
100
100
|
end
|
101
101
|
# rubocop:enable Style/ArgumentsForwarding
|
102
102
|
|
103
|
-
# Generate a chat completion for
|
103
|
+
# Generate a chat completion for given messages.
|
104
104
|
#
|
105
105
|
# @param messages [Array<Hash>] List of messages comprising the conversation so far
|
106
106
|
# @param model [String] ID of the model to use
|
@@ -10,6 +10,10 @@ module Langchain::LLM
|
|
10
10
|
completions.first
|
11
11
|
end
|
12
12
|
|
13
|
+
def chat_completion
|
14
|
+
raw_response.dig("content", 0, "text")
|
15
|
+
end
|
16
|
+
|
13
17
|
def completions
|
14
18
|
[raw_response.dig("completion")]
|
15
19
|
end
|
@@ -26,8 +30,20 @@ module Langchain::LLM
|
|
26
30
|
raw_response.dig("log_id")
|
27
31
|
end
|
28
32
|
|
33
|
+
def prompt_tokens
|
34
|
+
raw_response.dig("usage", "input_tokens").to_i
|
35
|
+
end
|
36
|
+
|
37
|
+
def completion_tokens
|
38
|
+
raw_response.dig("usage", "output_tokens").to_i
|
39
|
+
end
|
40
|
+
|
41
|
+
def total_tokens
|
42
|
+
prompt_tokens + completion_tokens
|
43
|
+
end
|
44
|
+
|
29
45
|
def role
|
30
|
-
"
|
46
|
+
raw_response.dig("role")
|
31
47
|
end
|
32
48
|
end
|
33
49
|
end
|
@@ -5,15 +5,26 @@ require "csv"
|
|
5
5
|
module Langchain
|
6
6
|
module Processors
|
7
7
|
class CSV < Base
|
8
|
+
class InvalidChunkMode < StandardError; end
|
9
|
+
|
8
10
|
EXTENSIONS = [".csv"]
|
9
11
|
CONTENT_TYPES = ["text/csv"]
|
12
|
+
CHUNK_MODE = {
|
13
|
+
row: "row",
|
14
|
+
file: "file"
|
15
|
+
}
|
10
16
|
|
11
17
|
# Parse the document and return the text
|
12
18
|
# @param [File] data
|
13
|
-
# @return [
|
19
|
+
# @return [String]
|
14
20
|
def parse(data)
|
15
|
-
|
16
|
-
|
21
|
+
case chunk_mode
|
22
|
+
when CHUNK_MODE[:row]
|
23
|
+
chunk_row(data)
|
24
|
+
when CHUNK_MODE[:file]
|
25
|
+
chunk_file(data)
|
26
|
+
else
|
27
|
+
raise InvalidChunkMode
|
17
28
|
end
|
18
29
|
end
|
19
30
|
|
@@ -22,6 +33,29 @@ module Langchain
|
|
22
33
|
def separator
|
23
34
|
@options[:col_sep] || ","
|
24
35
|
end
|
36
|
+
|
37
|
+
def chunk_mode
|
38
|
+
if @options[:chunk_mode].to_s.empty?
|
39
|
+
CHUNK_MODE[:row]
|
40
|
+
else
|
41
|
+
raise InvalidChunkMode unless CHUNK_MODE.value?(@options[:chunk_mode])
|
42
|
+
|
43
|
+
@options[:chunk_mode]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def chunk_row(data)
|
48
|
+
::CSV.new(data.read, col_sep: separator).map do |row|
|
49
|
+
row
|
50
|
+
.compact
|
51
|
+
.map(&:strip)
|
52
|
+
.join(separator)
|
53
|
+
end.join("\n\n")
|
54
|
+
end
|
55
|
+
|
56
|
+
def chunk_file(data)
|
57
|
+
data.read
|
58
|
+
end
|
25
59
|
end
|
26
60
|
end
|
27
61
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Processors
|
5
|
+
class Pptx < Base
|
6
|
+
EXTENSIONS = [".pptx"]
|
7
|
+
CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.presentationml.presentation"]
|
8
|
+
|
9
|
+
def initialize(*)
|
10
|
+
depends_on "power_point_pptx"
|
11
|
+
end
|
12
|
+
|
13
|
+
# Parse the document and return the text
|
14
|
+
# @param [File] data
|
15
|
+
# @return [String]
|
16
|
+
def parse(data)
|
17
|
+
presentation = PowerPointPptx::Document.open(data)
|
18
|
+
|
19
|
+
slides = presentation.slides
|
20
|
+
contents = slides.map(&:content)
|
21
|
+
text = contents.map do |sections|
|
22
|
+
sections.map(&:strip).join(" ")
|
23
|
+
end
|
24
|
+
|
25
|
+
text.join("\n\n")
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -16,7 +16,8 @@ module Langchain::Vectorsearch
|
|
16
16
|
# The operators supported by the PostgreSQL vector search adapter
|
17
17
|
OPERATORS = {
|
18
18
|
"cosine_distance" => "cosine",
|
19
|
-
"euclidean_distance" => "euclidean"
|
19
|
+
"euclidean_distance" => "euclidean",
|
20
|
+
"inner_product_distance" => "inner_product"
|
20
21
|
}
|
21
22
|
DEFAULT_OPERATOR = "cosine_distance"
|
22
23
|
|
data/lib/langchain/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.11.
|
4
|
+
version: 0.11.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-04-
|
11
|
+
date: 2024-04-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -238,16 +238,16 @@ dependencies:
|
|
238
238
|
name: anthropic
|
239
239
|
requirement: !ruby/object:Gem::Requirement
|
240
240
|
requirements:
|
241
|
-
- - "
|
241
|
+
- - ">="
|
242
242
|
- !ruby/object:Gem::Version
|
243
|
-
version: 0
|
243
|
+
version: '0'
|
244
244
|
type: :development
|
245
245
|
prerelease: false
|
246
246
|
version_requirements: !ruby/object:Gem::Requirement
|
247
247
|
requirements:
|
248
|
-
- - "
|
248
|
+
- - ">="
|
249
249
|
- !ruby/object:Gem::Version
|
250
|
-
version: 0
|
250
|
+
version: '0'
|
251
251
|
- !ruby/object:Gem::Dependency
|
252
252
|
name: aws-sdk-bedrockruntime
|
253
253
|
requirement: !ruby/object:Gem::Requirement
|
@@ -682,6 +682,20 @@ dependencies:
|
|
682
682
|
- - ">="
|
683
683
|
- !ruby/object:Gem::Version
|
684
684
|
version: '0'
|
685
|
+
- !ruby/object:Gem::Dependency
|
686
|
+
name: power_point_pptx
|
687
|
+
requirement: !ruby/object:Gem::Requirement
|
688
|
+
requirements:
|
689
|
+
- - "~>"
|
690
|
+
- !ruby/object:Gem::Version
|
691
|
+
version: 0.1.0
|
692
|
+
type: :development
|
693
|
+
prerelease: false
|
694
|
+
version_requirements: !ruby/object:Gem::Requirement
|
695
|
+
requirements:
|
696
|
+
- - "~>"
|
697
|
+
- !ruby/object:Gem::Version
|
698
|
+
version: 0.1.0
|
685
699
|
description: Build LLM-backed Ruby applications with Ruby's Langchain.rb
|
686
700
|
email:
|
687
701
|
- andrei.bondarev13@gmail.com
|
@@ -758,6 +772,7 @@ files:
|
|
758
772
|
- lib/langchain/processors/jsonl.rb
|
759
773
|
- lib/langchain/processors/markdown.rb
|
760
774
|
- lib/langchain/processors/pdf.rb
|
775
|
+
- lib/langchain/processors/pptx.rb
|
761
776
|
- lib/langchain/processors/text.rb
|
762
777
|
- lib/langchain/processors/xlsx.rb
|
763
778
|
- lib/langchain/prompt.rb
|