langchainrb 0.5.6 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +11 -8
- data/README.md +6 -6
- data/examples/create_and_manage_prompt_templates_using_structured_output_parser.rb +104 -0
- data/lib/langchain/agent/base.rb +1 -1
- data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent.rb → react_agent/react_agent.rb} +5 -5
- data/lib/langchain/agent/sql_query_agent/sql_query_agent.rb +1 -1
- data/lib/langchain/data.rb +8 -0
- data/lib/langchain/llm/ai21.rb +21 -3
- data/lib/langchain/llm/cohere.rb +11 -6
- data/lib/langchain/llm/google_palm.rb +13 -15
- data/lib/langchain/llm/openai.rb +52 -12
- data/lib/langchain/llm/replicate.rb +5 -4
- data/lib/langchain/loader.rb +41 -9
- data/lib/langchain/output_parsers/base.rb +45 -0
- data/lib/langchain/output_parsers/structured.rb +91 -0
- data/lib/langchain/tool/base.rb +1 -1
- data/lib/langchain/tool/calculator.rb +6 -8
- data/lib/langchain/tool/database.rb +38 -7
- data/lib/langchain/utils/token_length/ai21_validator.rb +36 -0
- data/lib/langchain/utils/token_length/cohere_validator.rb +44 -0
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +10 -3
- metadata +29 -10
- /data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent_prompt.yaml → react_agent/react_agent_prompt.yaml} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7fba7b5e03ae75aa4ee3c89dd0322a73bbb9d3ced79f48dda8861af1f4f197b8
|
4
|
+
data.tar.gz: 404e742b0911305beec3bd22575740fc78ed9005e21295e0f7c348c1bede3e7e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c8166375c28abe9bc3a7e02a2ceba2ce1bea8ff4f751beb2c69d6f34aa46ba2b7c6ca34458f79b6dfba6d8908cc5e9b055f4e4e7dc6c4c09ac65f7f589c12eb7
|
7
|
+
data.tar.gz: fb81a51867575c5fae10b79f0f3ee761b25b75d623f2c071d000b39f84c672d9f7164e3c8442a1f11d7abdff8edd41f7e79c2c3a7cb99fa9952406bc60506ce8
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.6.0] - 2023-06-22
|
4
|
+
- [BREAKING] Rename `ChainOfThoughtAgent` to `ReActAgent`
|
5
|
+
- Implement A21 token validator
|
6
|
+
- Add `Langchain::OutputParsers`
|
7
|
+
|
8
|
+
## [0.5.7] - 2023-06-19
|
9
|
+
- Developer can modify models used when initiliazing `Langchain::LLM::*` clients
|
10
|
+
- Improvements to the `SQLQueryAgent` and the database tool
|
11
|
+
|
3
12
|
## [0.5.6] - 2023-06-18
|
4
13
|
- If used with OpenAI, Langchain::Conversation responses can now be streamed.
|
5
14
|
- Improved logging
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb (0.
|
4
|
+
langchainrb (0.6.0)
|
5
5
|
baran (~> 0.1.6)
|
6
6
|
colorize (~> 0.8.1)
|
7
|
+
json-schema (~> 4.0.0)
|
7
8
|
tiktoken_ruby (~> 0.0.5)
|
8
9
|
|
9
10
|
GEM
|
@@ -31,7 +32,7 @@ GEM
|
|
31
32
|
addressable (2.8.4)
|
32
33
|
public_suffix (>= 2.0.2, < 6.0)
|
33
34
|
afm (0.2.2)
|
34
|
-
ai21 (0.2.
|
35
|
+
ai21 (0.2.1)
|
35
36
|
ast (2.4.2)
|
36
37
|
baran (0.1.6)
|
37
38
|
builder (3.2.4)
|
@@ -41,7 +42,7 @@ GEM
|
|
41
42
|
dry-monads (~> 1.6)
|
42
43
|
ruby-next-core (>= 0.15.0)
|
43
44
|
coderay (1.1.3)
|
44
|
-
cohere-ruby (0.9.
|
45
|
+
cohere-ruby (0.9.5)
|
45
46
|
faraday (>= 1.0.0)
|
46
47
|
faraday_middleware (>= 1.0.0)
|
47
48
|
colorize (0.8.1)
|
@@ -124,7 +125,7 @@ GEM
|
|
124
125
|
faraday-retry (1.0.3)
|
125
126
|
faraday_middleware (1.2.0)
|
126
127
|
faraday (~> 1.0)
|
127
|
-
google_palm_api (0.1.
|
128
|
+
google_palm_api (0.1.2)
|
128
129
|
faraday (>= 1.0.0)
|
129
130
|
faraday_middleware (>= 1.0.0)
|
130
131
|
google_search_results (2.0.1)
|
@@ -148,6 +149,8 @@ GEM
|
|
148
149
|
concurrent-ruby (~> 1.0)
|
149
150
|
ice_nine (0.11.2)
|
150
151
|
json (2.6.3)
|
152
|
+
json-schema (4.0.0)
|
153
|
+
addressable (>= 2.8)
|
151
154
|
language_server-protocol (3.17.0.3)
|
152
155
|
lint_roller (1.0.0)
|
153
156
|
loofah (2.21.1)
|
@@ -219,7 +222,7 @@ GEM
|
|
219
222
|
zeitwerk (~> 2.5)
|
220
223
|
rainbow (3.1.1)
|
221
224
|
rake (13.0.6)
|
222
|
-
rb_sys (0.9.
|
225
|
+
rb_sys (0.9.79)
|
223
226
|
rdiscount (2.2.7)
|
224
227
|
regexp_parser (2.8.0)
|
225
228
|
replicate-ruby (0.2.2)
|
@@ -313,13 +316,13 @@ PLATFORMS
|
|
313
316
|
x86_64-linux
|
314
317
|
|
315
318
|
DEPENDENCIES
|
316
|
-
ai21 (~> 0.2.
|
319
|
+
ai21 (~> 0.2.1)
|
317
320
|
chroma-db (~> 0.3.0)
|
318
|
-
cohere-ruby (~> 0.9.
|
321
|
+
cohere-ruby (~> 0.9.5)
|
319
322
|
docx (~> 0.8.0)
|
320
323
|
dotenv-rails (~> 2.7.6)
|
321
324
|
eqn (~> 1.6.5)
|
322
|
-
google_palm_api (~> 0.1.
|
325
|
+
google_palm_api (~> 0.1.2)
|
323
326
|
google_search_results (~> 2.0.0)
|
324
327
|
hnswlib (~> 0.8.1)
|
325
328
|
hugging-face (~> 0.3.4)
|
data/README.md
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
[![Gem Version](https://badge.fury.io/rb/langchainrb.svg)](https://badge.fury.io/rb/langchainrb)
|
11
11
|
[![Docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://rubydoc.info/gems/langchainrb)
|
12
12
|
[![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/andreibondarev/langchainrb/blob/main/LICENSE.txt)
|
13
|
-
[![](https://dcbadge.vercel.app/api/server/
|
13
|
+
[![](https://dcbadge.vercel.app/api/server/WDARp7J2n8?compact=true&style=flat)](https://discord.gg/WDARp7J2n8)
|
14
14
|
|
15
15
|
|
16
16
|
Langchain.rb is a library that's an abstraction layer on top many emergent AI, ML and other DS tools. The goal is to abstract complexity and difficult concepts to make building AI/ML-supercharged applications approachable for traditional software engineers.
|
@@ -155,13 +155,13 @@ replicate = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
|
|
155
155
|
```
|
156
156
|
|
157
157
|
#### Google PaLM (Pathways Language Model)
|
158
|
-
Add `"google_palm_api", "~> 0.1.
|
158
|
+
Add `"google_palm_api", "~> 0.1.2"` to your Gemfile.
|
159
159
|
```ruby
|
160
160
|
google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
|
161
161
|
```
|
162
162
|
|
163
163
|
#### AI21
|
164
|
-
Add `gem "ai21", "~> 0.2.
|
164
|
+
Add `gem "ai21", "~> 0.2.1"` to your Gemfile.
|
165
165
|
```ruby
|
166
166
|
ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
|
167
167
|
```
|
@@ -261,7 +261,7 @@ prompt.input_variables #=> ["adjective", "content"]
|
|
261
261
|
### Using Agents 🤖
|
262
262
|
Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
|
263
263
|
|
264
|
-
####
|
264
|
+
#### ReAct Agent
|
265
265
|
|
266
266
|
Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
|
267
267
|
|
@@ -271,7 +271,7 @@ calculator = Langchain::Tool::Calculator.new
|
|
271
271
|
|
272
272
|
openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
273
273
|
|
274
|
-
agent = Langchain::Agent::
|
274
|
+
agent = Langchain::Agent::ReActAgent.new(
|
275
275
|
llm: openai,
|
276
276
|
tools: [search_tool, calculator]
|
277
277
|
)
|
@@ -367,7 +367,7 @@ Langchain.logger.level = :info
|
|
367
367
|
5. Optionally, install lefthook git hooks for pre-commit to auto lint: `gem install lefthook && lefthook install -f`
|
368
368
|
|
369
369
|
## Discord
|
370
|
-
Join us in the [Langchain.rb](https://discord.gg/
|
370
|
+
Join us in the [Langchain.rb](https://discord.gg/WDARp7J2n8) Discord server.
|
371
371
|
|
372
372
|
## Core Contributors
|
373
373
|
[<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://github.com/andreibondarev)
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require "langchain"
|
2
|
+
|
3
|
+
# Generate a prompt that directs the LLM to provide a JSON response that adheres to a specific JSON schema.
|
4
|
+
json_schema = {
|
5
|
+
type: "object",
|
6
|
+
properties: {
|
7
|
+
name: {
|
8
|
+
type: "string",
|
9
|
+
description: "Persons name"
|
10
|
+
},
|
11
|
+
age: {
|
12
|
+
type: "number",
|
13
|
+
description: "Persons age"
|
14
|
+
},
|
15
|
+
interests: {
|
16
|
+
type: "array",
|
17
|
+
items: {
|
18
|
+
type: "object",
|
19
|
+
properties: {
|
20
|
+
interest: {
|
21
|
+
type: "string",
|
22
|
+
description: "A topic of interest"
|
23
|
+
},
|
24
|
+
levelOfInterest: {
|
25
|
+
type: "number",
|
26
|
+
description: "A value between 0 and 100 of how interested the person is in this interest"
|
27
|
+
}
|
28
|
+
},
|
29
|
+
required: ["interest", "levelOfInterest"],
|
30
|
+
additionalProperties: false
|
31
|
+
},
|
32
|
+
minItems: 1,
|
33
|
+
maxItems: 3,
|
34
|
+
description: "A list of the person's interests"
|
35
|
+
}
|
36
|
+
},
|
37
|
+
required: ["name", "age", "interests"],
|
38
|
+
additionalProperties: false
|
39
|
+
}
|
40
|
+
parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema(json_schema)
|
41
|
+
prompt = Langchain::Prompt::PromptTemplate.new(template: "Generate details of a fictional character.\n{format_instructions}\nCharacter description: {description}", input_variables: ["description", "format_instructions"])
|
42
|
+
prompt.format(description: "Korean chemistry student", format_instructions: parser.get_format_instructions)
|
43
|
+
# Generate details of a fictional character.
|
44
|
+
# You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
|
45
|
+
|
46
|
+
# "JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
|
47
|
+
|
48
|
+
# For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}, "required": ["foo"]}
|
49
|
+
# would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
|
50
|
+
# Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
|
51
|
+
|
52
|
+
# Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
|
53
|
+
|
54
|
+
# Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
|
55
|
+
# ```json
|
56
|
+
# {"type":"object","properties":{"name":{"type":"string","description":"Persons name"},"age":{"type":"number","description":"Persons age"},"interests":{"type":"array","items":{"type":"object","properties":{"interest":{"type":"string","description":"A topic of interest"},"levelOfInterest":{"type":"number","description":"A value between 0 and 100 of how interested the person is in this interest"},"required":["interest","levelOfInterest"],"additionalProperties":false},"minItems":1,"maxItems":3,"description":"A list of the person's interests"},"required":["name","age","interests"],"additionalProperties":false}
|
57
|
+
# ```
|
58
|
+
|
59
|
+
# Character description: Korean chemistry student
|
60
|
+
|
61
|
+
# LLM example response:
|
62
|
+
llm_example_response = <<~RESPONSE
|
63
|
+
Here is your character:
|
64
|
+
```json
|
65
|
+
{
|
66
|
+
"name": "Kim Ji-hyun",
|
67
|
+
"age": 22,
|
68
|
+
"interests": [
|
69
|
+
{
|
70
|
+
"interest": "Organic Chemistry",
|
71
|
+
"levelOfInterest": 85
|
72
|
+
},
|
73
|
+
{
|
74
|
+
"interest": "Biochemistry",
|
75
|
+
"levelOfInterest": 70
|
76
|
+
},
|
77
|
+
{
|
78
|
+
"interest": "Analytical Chemistry",
|
79
|
+
"levelOfInterest": 60
|
80
|
+
}
|
81
|
+
]
|
82
|
+
}
|
83
|
+
```
|
84
|
+
RESPONSE
|
85
|
+
|
86
|
+
parser.parse(llm_example_response)
|
87
|
+
# {
|
88
|
+
# "name" => "Kim Ji-hyun",
|
89
|
+
# "age" => 22,
|
90
|
+
# "interests" => [
|
91
|
+
# {
|
92
|
+
# "interest" => "Organic Chemistry",
|
93
|
+
# "levelOfInterest" => 85
|
94
|
+
# },
|
95
|
+
# {
|
96
|
+
# "interest" => "Biochemistry",
|
97
|
+
# "levelOfInterest" => 70
|
98
|
+
# },
|
99
|
+
# {
|
100
|
+
# "interest" => "Analytical Chemistry",
|
101
|
+
# "levelOfInterest" => 60
|
102
|
+
# }
|
103
|
+
# ]
|
104
|
+
# }
|
data/lib/langchain/agent/base.rb
CHANGED
@@ -6,7 +6,7 @@ module Langchain::Agent
|
|
6
6
|
# Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
|
7
7
|
#
|
8
8
|
# Available:
|
9
|
-
# - {Langchain::Agent::
|
9
|
+
# - {Langchain::Agent::ReActAgent}
|
10
10
|
#
|
11
11
|
# @abstract
|
12
12
|
class Base
|
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Langchain::Agent
|
4
|
-
# =
|
4
|
+
# = ReAct Agent
|
5
5
|
#
|
6
6
|
# llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]) # or your choice of Langchain::LLM::Base implementation
|
7
7
|
#
|
8
|
-
# agent = Langchain::Agent::
|
8
|
+
# agent = Langchain::Agent::ReActAgent.new(
|
9
9
|
# llm: llm,
|
10
10
|
# tools: ["google_search", "calculator", "wikipedia"]
|
11
11
|
# )
|
@@ -15,7 +15,7 @@ module Langchain::Agent
|
|
15
15
|
#
|
16
16
|
# agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
|
17
17
|
# #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
|
18
|
-
class
|
18
|
+
class ReActAgent < Base
|
19
19
|
attr_reader :llm, :tools, :max_iterations
|
20
20
|
|
21
21
|
# Initializes the Agent
|
@@ -23,7 +23,7 @@ module Langchain::Agent
|
|
23
23
|
# @param llm [Object] The LLM client to use
|
24
24
|
# @param tools [Array] The tools to use
|
25
25
|
# @param max_iterations [Integer] The maximum number of iterations to run
|
26
|
-
# @return [
|
26
|
+
# @return [ReActAgent] The Agent::ReActAgent instance
|
27
27
|
def initialize(llm:, tools: [], max_iterations: 10)
|
28
28
|
Langchain::Tool::Base.validate_tools!(tools: tools)
|
29
29
|
|
@@ -117,7 +117,7 @@ module Langchain::Agent
|
|
117
117
|
# @return [PromptTemplate] PromptTemplate instance
|
118
118
|
def prompt_template
|
119
119
|
@template ||= Langchain::Prompt.load_from_path(
|
120
|
-
file_path: Langchain.root.join("langchain/agent/
|
120
|
+
file_path: Langchain.root.join("langchain/agent/react_agent/react_agent_prompt.yaml")
|
121
121
|
)
|
122
122
|
end
|
123
123
|
|
data/lib/langchain/data.rb
CHANGED
@@ -1,18 +1,26 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Langchain
|
4
|
+
# Abstraction for data loaded by a {Langchain::Loader}
|
4
5
|
class Data
|
6
|
+
# URL or Path of the data source
|
7
|
+
# @return [String]
|
5
8
|
attr_reader :source
|
6
9
|
|
10
|
+
# @param data [String] data that was loaded
|
11
|
+
# @option options [String] :source URL or Path of the data source
|
7
12
|
def initialize(data, options = {})
|
8
13
|
@source = options[:source]
|
9
14
|
@data = data
|
10
15
|
end
|
11
16
|
|
17
|
+
# @return [String]
|
12
18
|
def value
|
13
19
|
@data
|
14
20
|
end
|
15
21
|
|
22
|
+
# @param opts [Hash] options passed to the chunker
|
23
|
+
# @return [Array<String>]
|
16
24
|
def chunks(opts = {})
|
17
25
|
Langchain::Chunker::Text.new(@data, **opts).chunks
|
18
26
|
end
|
data/lib/langchain/llm/ai21.rb
CHANGED
@@ -5,17 +5,25 @@ module Langchain::LLM
|
|
5
5
|
# Wrapper around AI21 Studio APIs.
|
6
6
|
#
|
7
7
|
# Gem requirements:
|
8
|
-
# gem "ai21", "~> 0.2.
|
8
|
+
# gem "ai21", "~> 0.2.1"
|
9
9
|
#
|
10
10
|
# Usage:
|
11
11
|
# ai21 = Langchain::LLM::AI21.new(api_key:)
|
12
12
|
#
|
13
13
|
class AI21 < Base
|
14
|
-
|
14
|
+
DEFAULTS = {
|
15
|
+
temperature: 0.0,
|
16
|
+
model: "j2-ultra"
|
17
|
+
}.freeze
|
18
|
+
|
19
|
+
LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AI21Validator
|
20
|
+
|
21
|
+
def initialize(api_key:, default_options: {})
|
15
22
|
depends_on "ai21"
|
16
23
|
require "ai21"
|
17
24
|
|
18
25
|
@client = ::AI21::Client.new(api_key)
|
26
|
+
@defaults = DEFAULTS.merge(default_options)
|
19
27
|
end
|
20
28
|
|
21
29
|
#
|
@@ -26,7 +34,11 @@ module Langchain::LLM
|
|
26
34
|
# @return [String] The completion
|
27
35
|
#
|
28
36
|
def complete(prompt:, **params)
|
29
|
-
|
37
|
+
parameters = complete_parameters params
|
38
|
+
|
39
|
+
parameters[:maxTokens] = LENGTH_VALIDATOR.validate_max_tokens!(prompt, parameters[:model], client)
|
40
|
+
|
41
|
+
response = client.complete(prompt, parameters)
|
30
42
|
response.dig(:completions, 0, :data, :text)
|
31
43
|
end
|
32
44
|
|
@@ -41,5 +53,11 @@ module Langchain::LLM
|
|
41
53
|
response = client.summarize(text, "TEXT", params)
|
42
54
|
response.dig(:summary)
|
43
55
|
end
|
56
|
+
|
57
|
+
private
|
58
|
+
|
59
|
+
def complete_parameters(params)
|
60
|
+
@defaults.dup.merge(params)
|
61
|
+
end
|
44
62
|
end
|
45
63
|
end
|
data/lib/langchain/llm/cohere.rb
CHANGED
@@ -13,16 +13,18 @@ module Langchain::LLM
|
|
13
13
|
class Cohere < Base
|
14
14
|
DEFAULTS = {
|
15
15
|
temperature: 0.0,
|
16
|
-
completion_model_name: "
|
16
|
+
completion_model_name: "command",
|
17
17
|
embeddings_model_name: "small",
|
18
|
-
dimension: 1024
|
18
|
+
dimension: 1024,
|
19
|
+
truncate: "START"
|
19
20
|
}.freeze
|
20
21
|
|
21
|
-
def initialize(api_key:)
|
22
|
+
def initialize(api_key:, default_options: {})
|
22
23
|
depends_on "cohere-ruby"
|
23
24
|
require "cohere"
|
24
25
|
|
25
26
|
@client = ::Cohere::Client.new(api_key: api_key)
|
27
|
+
@defaults = DEFAULTS.merge(default_options)
|
26
28
|
end
|
27
29
|
|
28
30
|
#
|
@@ -34,7 +36,7 @@ module Langchain::LLM
|
|
34
36
|
def embed(text:)
|
35
37
|
response = client.embed(
|
36
38
|
texts: [text],
|
37
|
-
model:
|
39
|
+
model: @defaults[:embeddings_model_name]
|
38
40
|
)
|
39
41
|
response.dig("embeddings").first
|
40
42
|
end
|
@@ -49,8 +51,9 @@ module Langchain::LLM
|
|
49
51
|
def complete(prompt:, **params)
|
50
52
|
default_params = {
|
51
53
|
prompt: prompt,
|
52
|
-
temperature:
|
53
|
-
model:
|
54
|
+
temperature: @defaults[:temperature],
|
55
|
+
model: @defaults[:completion_model_name],
|
56
|
+
truncate: @defaults[:truncate]
|
54
57
|
}
|
55
58
|
|
56
59
|
if params[:stop_sequences]
|
@@ -59,6 +62,8 @@ module Langchain::LLM
|
|
59
62
|
|
60
63
|
default_params.merge!(params)
|
61
64
|
|
65
|
+
default_params[:max_tokens] = Langchain::Utils::TokenLength::CohereValidator.validate_max_tokens!(prompt, default_params[:model], client)
|
66
|
+
|
62
67
|
response = client.generate(**default_params)
|
63
68
|
response.dig("generations").first.dig("text")
|
64
69
|
end
|
@@ -5,32 +5,27 @@ module Langchain::LLM
|
|
5
5
|
# Wrapper around the Google PaLM (Pathways Language Model) APIs: https://ai.google/build/machine-learning/
|
6
6
|
#
|
7
7
|
# Gem requirements:
|
8
|
-
# gem "google_palm_api", "~> 0.1.
|
8
|
+
# gem "google_palm_api", "~> 0.1.2"
|
9
9
|
#
|
10
10
|
# Usage:
|
11
11
|
# google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
|
12
12
|
#
|
13
13
|
class GooglePalm < Base
|
14
|
-
#
|
15
|
-
# Wrapper around the Google PaLM (Pathways Language Model) APIs.
|
16
|
-
#
|
17
|
-
# Gem requirements: gem "google_palm_api", "~> 0.1.1"
|
18
|
-
#
|
19
|
-
# Usage:
|
20
|
-
# google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
|
21
|
-
#
|
22
|
-
|
23
14
|
DEFAULTS = {
|
24
15
|
temperature: 0.0,
|
25
|
-
dimension: 768 # This is what the `embedding-gecko-001` model generates
|
16
|
+
dimension: 768, # This is what the `embedding-gecko-001` model generates
|
17
|
+
completion_model_name: "text-bison-001",
|
18
|
+
chat_completion_model_name: "chat-bison-001",
|
19
|
+
embeddings_model_name: "embedding-gecko-001"
|
26
20
|
}.freeze
|
27
21
|
LENGTH_VALIDATOR = Langchain::Utils::TokenLength::GooglePalmValidator
|
28
22
|
|
29
|
-
def initialize(api_key:)
|
23
|
+
def initialize(api_key:, default_options: {})
|
30
24
|
depends_on "google_palm_api"
|
31
25
|
require "google_palm_api"
|
32
26
|
|
33
27
|
@client = ::GooglePalmApi::Client.new(api_key: api_key)
|
28
|
+
@defaults = DEFAULTS.merge(default_options)
|
34
29
|
end
|
35
30
|
|
36
31
|
#
|
@@ -56,7 +51,8 @@ module Langchain::LLM
|
|
56
51
|
def complete(prompt:, **params)
|
57
52
|
default_params = {
|
58
53
|
prompt: prompt,
|
59
|
-
temperature:
|
54
|
+
temperature: @defaults[:temperature],
|
55
|
+
model: @defaults[:completion_model_name]
|
60
56
|
}
|
61
57
|
|
62
58
|
if params[:stop_sequences]
|
@@ -85,12 +81,14 @@ module Langchain::LLM
|
|
85
81
|
raise ArgumentError.new(":prompt or :messages argument is expected") if prompt.empty? && messages.empty?
|
86
82
|
|
87
83
|
default_params = {
|
88
|
-
temperature:
|
84
|
+
temperature: @defaults[:temperature],
|
85
|
+
model: @defaults[:chat_completion_model_name],
|
89
86
|
context: context,
|
90
87
|
messages: compose_chat_messages(prompt: prompt, messages: messages),
|
91
88
|
examples: compose_examples(examples)
|
92
89
|
}
|
93
90
|
|
91
|
+
# chat-bison-001 is the only model that currently supports countMessageTokens functions
|
94
92
|
LENGTH_VALIDATOR.validate_max_tokens!(default_params[:messages], "chat-bison-001", llm: self)
|
95
93
|
|
96
94
|
if options[:stop_sequences]
|
@@ -123,7 +121,7 @@ module Langchain::LLM
|
|
123
121
|
|
124
122
|
complete(
|
125
123
|
prompt: prompt,
|
126
|
-
temperature:
|
124
|
+
temperature: @defaults[:temperature],
|
127
125
|
# Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
|
128
126
|
max_tokens: 2048
|
129
127
|
)
|
data/lib/langchain/llm/openai.rb
CHANGED
@@ -19,11 +19,12 @@ module Langchain::LLM
|
|
19
19
|
}.freeze
|
20
20
|
LENGTH_VALIDATOR = Langchain::Utils::TokenLength::OpenAIValidator
|
21
21
|
|
22
|
-
def initialize(api_key:, llm_options: {})
|
22
|
+
def initialize(api_key:, llm_options: {}, default_options: {})
|
23
23
|
depends_on "ruby-openai"
|
24
24
|
require "openai"
|
25
25
|
|
26
26
|
@client = ::OpenAI::Client.new(access_token: api_key, **llm_options)
|
27
|
+
@defaults = DEFAULTS.merge(default_options)
|
27
28
|
end
|
28
29
|
|
29
30
|
#
|
@@ -34,7 +35,7 @@ module Langchain::LLM
|
|
34
35
|
# @return [Array] The embedding
|
35
36
|
#
|
36
37
|
def embed(text:, **params)
|
37
|
-
parameters = {model:
|
38
|
+
parameters = {model: @defaults[:embeddings_model_name], input: text}
|
38
39
|
|
39
40
|
validate_max_tokens(text, parameters[:model])
|
40
41
|
|
@@ -50,7 +51,7 @@ module Langchain::LLM
|
|
50
51
|
# @return [String] The completion
|
51
52
|
#
|
52
53
|
def complete(prompt:, **params)
|
53
|
-
parameters = compose_parameters
|
54
|
+
parameters = compose_parameters @defaults[:completion_model_name], params
|
54
55
|
|
55
56
|
parameters[:prompt] = prompt
|
56
57
|
parameters[:max_tokens] = validate_max_tokens(prompt, parameters[:model])
|
@@ -60,20 +61,59 @@ module Langchain::LLM
|
|
60
61
|
end
|
61
62
|
|
62
63
|
#
|
63
|
-
# Generate a chat completion for a given prompt
|
64
|
+
# Generate a chat completion for a given prompt or messages.
|
65
|
+
#
|
66
|
+
# == Examples
|
67
|
+
#
|
68
|
+
# # simplest case, just give a prompt
|
69
|
+
# openai.chat prompt: "When was Ruby first released?"
|
70
|
+
#
|
71
|
+
# # prompt plus some context about how to respond
|
72
|
+
# openai.chat context: "You are RubyGPT, a helpful chat bot for helping people learn Ruby", prompt: "Does Ruby have a REPL like IPython?"
|
73
|
+
#
|
74
|
+
# # full control over messages that get sent, equivilent to the above
|
75
|
+
# openai.chat messages: [
|
76
|
+
# {
|
77
|
+
# role: "system",
|
78
|
+
# content: "You are RubyGPT, a helpful chat bot for helping people learn Ruby", prompt: "Does Ruby have a REPL like IPython?"
|
79
|
+
# },
|
80
|
+
# {
|
81
|
+
# role: "user",
|
82
|
+
# content: "When was Ruby first released?"
|
83
|
+
# }
|
84
|
+
# ]
|
85
|
+
#
|
86
|
+
# # few-short prompting with examples
|
87
|
+
# openai.chat prompt: "When was factory_bot released?",
|
88
|
+
# examples: [
|
89
|
+
# {
|
90
|
+
# role: "user",
|
91
|
+
# content: "When was Ruby on Rails released?"
|
92
|
+
# }
|
93
|
+
# {
|
94
|
+
# role: "assistant",
|
95
|
+
# content: "2004"
|
96
|
+
# },
|
97
|
+
# ]
|
64
98
|
#
|
65
99
|
# @param prompt [String] The prompt to generate a chat completion for
|
66
|
-
# @param messages [Array] The messages that have been sent in the conversation
|
67
|
-
#
|
68
|
-
#
|
69
|
-
#
|
70
|
-
# @param
|
100
|
+
# @param messages [Array<Hash>] The messages that have been sent in the conversation
|
101
|
+
# Each message should be a Hash with the following keys:
|
102
|
+
# - :content [String] The content of the message
|
103
|
+
# - :role [String] The role of the sender (system, user, assistant, or function)
|
104
|
+
# @param context [String] An initial context to provide as a system message, ie "You are RubyGPT, a helpful chat bot for helping people learn Ruby"
|
105
|
+
# @param examples [Array<Hash>] Examples of messages to provide to the model. Useful for Few-Shot Prompting
|
106
|
+
# Each message should be a Hash with the following keys:
|
107
|
+
# - :content [String] The content of the message
|
108
|
+
# - :role [String] The role of the sender (system, user, assistant, or function)
|
109
|
+
# @param options <Hash> extra parameters passed to OpenAI::Client#chat
|
110
|
+
# @yield [String] Stream responses back one String at a time
|
71
111
|
# @return [String] The chat completion
|
72
112
|
#
|
73
113
|
def chat(prompt: "", messages: [], context: "", examples: [], **options)
|
74
114
|
raise ArgumentError.new(":prompt or :messages argument is expected") if prompt.empty? && messages.empty?
|
75
115
|
|
76
|
-
parameters = compose_parameters
|
116
|
+
parameters = compose_parameters @defaults[:chat_completion_model_name], options
|
77
117
|
parameters[:messages] = compose_chat_messages(prompt: prompt, messages: messages, context: context, examples: examples)
|
78
118
|
parameters[:max_tokens] = validate_max_tokens(parameters[:messages], parameters[:model])
|
79
119
|
|
@@ -104,13 +144,13 @@ module Langchain::LLM
|
|
104
144
|
)
|
105
145
|
prompt = prompt_template.format(text: text)
|
106
146
|
|
107
|
-
complete(prompt: prompt, temperature:
|
147
|
+
complete(prompt: prompt, temperature: @defaults[:temperature])
|
108
148
|
end
|
109
149
|
|
110
150
|
private
|
111
151
|
|
112
152
|
def compose_parameters(model, params)
|
113
|
-
default_params = {model: model, temperature:
|
153
|
+
default_params = {model: model, temperature: @defaults[:temperature]}
|
114
154
|
|
115
155
|
default_params[:stop] = params.delete(:stop_sequences) if params[:stop_sequences]
|
116
156
|
|
@@ -32,7 +32,7 @@ module Langchain::LLM
|
|
32
32
|
#
|
33
33
|
# @param api_key [String] The API key to use
|
34
34
|
#
|
35
|
-
def initialize(api_key:)
|
35
|
+
def initialize(api_key:, default_options: {})
|
36
36
|
depends_on "replicate-ruby"
|
37
37
|
require "replicate"
|
38
38
|
|
@@ -41,6 +41,7 @@ module Langchain::LLM
|
|
41
41
|
end
|
42
42
|
|
43
43
|
@client = ::Replicate.client
|
44
|
+
@defaults = DEFAULTS.merge(default_options)
|
44
45
|
end
|
45
46
|
|
46
47
|
#
|
@@ -100,7 +101,7 @@ module Langchain::LLM
|
|
100
101
|
|
101
102
|
complete(
|
102
103
|
prompt: prompt,
|
103
|
-
temperature:
|
104
|
+
temperature: @defaults[:temperature],
|
104
105
|
# Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
|
105
106
|
max_tokens: 2048
|
106
107
|
)
|
@@ -111,11 +112,11 @@ module Langchain::LLM
|
|
111
112
|
private
|
112
113
|
|
113
114
|
def completion_model
|
114
|
-
@completion_model ||= client.retrieve_model(
|
115
|
+
@completion_model ||= client.retrieve_model(@defaults[:completion_model_name]).latest_version
|
115
116
|
end
|
116
117
|
|
117
118
|
def embeddings_model
|
118
|
-
@embeddings_model ||= client.retrieve_model(
|
119
|
+
@embeddings_model ||= client.retrieve_model(@defaults[:embeddings_model_name]).latest_version
|
119
120
|
end
|
120
121
|
end
|
121
122
|
end
|
data/lib/langchain/loader.rb
CHANGED
@@ -10,32 +10,64 @@ module Langchain
|
|
10
10
|
|
11
11
|
URI_REGEX = %r{\A[A-Za-z][A-Za-z0-9+\-.]*://}
|
12
12
|
|
13
|
-
# Load data from a file or
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
13
|
+
# Load data from a file or URL. Shorthand for `Langchain::Loader.new(path).load`
|
14
|
+
#
|
15
|
+
# == Examples
|
16
|
+
#
|
17
|
+
# # load a URL
|
18
|
+
# data = Langchain::Loader.load("https://example.com/docs/README.md")
|
19
|
+
#
|
20
|
+
# # load a file
|
21
|
+
# data = Langchain::Loader.load("README.md")
|
22
|
+
#
|
23
|
+
# # Load data using a custom processor
|
24
|
+
# data = Langchain::Loader.load("README.md") do |raw_data, options|
|
25
|
+
# # your processing code goes here
|
26
|
+
# # return data at the end here
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# @param path [String | Pathname] path to file or URL
|
30
|
+
# @param options [Hash] options passed to the processor class used to process the data
|
31
|
+
# @return [Data] data loaded from path
|
17
32
|
def self.load(path, options = {}, &block)
|
18
33
|
new(path, options).load(&block)
|
19
34
|
end
|
20
35
|
|
21
36
|
# Initialize Langchain::Loader
|
22
|
-
# @param path [String | Pathname] path to file or
|
37
|
+
# @param path [String | Pathname] path to file or URL
|
38
|
+
# @param options [Hash] options passed to the processor class used to process the data
|
23
39
|
# @return [Langchain::Loader] loader instance
|
24
40
|
def initialize(path, options = {})
|
25
41
|
@options = options
|
26
42
|
@path = path
|
27
43
|
end
|
28
44
|
|
29
|
-
#
|
30
|
-
#
|
45
|
+
# Is the path a URL?
|
46
|
+
#
|
47
|
+
# @return [Boolean] true if path is URL
|
31
48
|
def url?
|
32
49
|
return false if @path.is_a?(Pathname)
|
33
50
|
|
34
51
|
!!(@path =~ URI_REGEX)
|
35
52
|
end
|
36
53
|
|
37
|
-
# Load data from a file or
|
38
|
-
#
|
54
|
+
# Load data from a file or URL
|
55
|
+
#
|
56
|
+
# loader = Langchain::Loader.new("README.md")
|
57
|
+
# # Load data using default processor for the file
|
58
|
+
# loader.load
|
59
|
+
#
|
60
|
+
# # Load data using a custom processor
|
61
|
+
# loader.load do |raw_data, options|
|
62
|
+
# # your processing code goes here
|
63
|
+
# # return data at the end here
|
64
|
+
# end
|
65
|
+
#
|
66
|
+
# @yield [String, Hash] handle parsing raw output into string directly
|
67
|
+
# @yieldparam [String] raw_data from the loaded URL or file
|
68
|
+
# @yieldreturn [String] parsed data, as a String
|
69
|
+
#
|
70
|
+
# @return [Data] data that was loaded
|
39
71
|
def load(&block)
|
40
72
|
@raw_data = url? ? load_from_url : load_from_path
|
41
73
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::OutputParsers
|
4
|
+
# Structured output parsers from the LLM.
|
5
|
+
#
|
6
|
+
# @abstract
|
7
|
+
class Base
|
8
|
+
#
|
9
|
+
# Parse the output of an LLM call.
|
10
|
+
#
|
11
|
+
# @param text - LLM output to parse.
|
12
|
+
# @returns Parsed output.
|
13
|
+
#
|
14
|
+
def parse(text:)
|
15
|
+
raise NotImplementedError
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Return a string describing the format of the output.
|
20
|
+
#
|
21
|
+
# @returns Format instructions.
|
22
|
+
# @param options - Options for formatting instructions.
|
23
|
+
# @example
|
24
|
+
# ```json
|
25
|
+
# {
|
26
|
+
# "foo": "bar"
|
27
|
+
# }
|
28
|
+
# ```
|
29
|
+
#
|
30
|
+
def get_format_instructions
|
31
|
+
raise NotImplementedError
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class OutputParserException < StandardError
|
36
|
+
def initialize(message, text)
|
37
|
+
@message = message
|
38
|
+
@text = text
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_s
|
42
|
+
"#{@message}\nText: #{@text}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
require "json-schema"
|
5
|
+
|
6
|
+
module Langchain::OutputParsers
|
7
|
+
# = Structured Output Parser
|
8
|
+
#
|
9
|
+
class StructuredOutputParser < Base
|
10
|
+
attr_reader :schema
|
11
|
+
|
12
|
+
#
|
13
|
+
# Initializes a new instance of the class.
|
14
|
+
#
|
15
|
+
# @param schema [JSON::Schema] The json schema
|
16
|
+
#
|
17
|
+
def initialize(schema:)
|
18
|
+
@schema = validate_schema!(schema)
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_h
|
22
|
+
{
|
23
|
+
_type: "StructuredOutputParser",
|
24
|
+
schema: schema.to_json
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Creates a new instance of the class using the given JSON::Schema.
|
30
|
+
#
|
31
|
+
# @param schema [JSON::Schema] The JSON::Schema to use
|
32
|
+
#
|
33
|
+
# @return [Object] A new instance of the class
|
34
|
+
#
|
35
|
+
def self.from_json_schema(schema)
|
36
|
+
new(schema: schema)
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
# Returns a string containing instructions for how the output of a language model should be formatted
|
41
|
+
# according to the @schema.
|
42
|
+
#
|
43
|
+
# @return [String] Instructions for how the output of a language model should be formatted
|
44
|
+
# according to the @schema.
|
45
|
+
#
|
46
|
+
def get_format_instructions
|
47
|
+
<<~INSTRUCTIONS
|
48
|
+
You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
|
49
|
+
|
50
|
+
"JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
|
51
|
+
|
52
|
+
For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}
|
53
|
+
would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
|
54
|
+
Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}}} is not well-formatted.
|
55
|
+
|
56
|
+
Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
|
57
|
+
|
58
|
+
Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
|
59
|
+
```json
|
60
|
+
#{schema.to_json}
|
61
|
+
```
|
62
|
+
INSTRUCTIONS
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Parse the output of an LLM call extracting an object that abides by the @schema
|
67
|
+
#
|
68
|
+
# @param text [String] Text output from the LLM call
|
69
|
+
#
|
70
|
+
# @return [Object] object that abides by the @schema
|
71
|
+
#
|
72
|
+
def parse(text)
|
73
|
+
json = text.include?("```") ? text.strip.split(/```(?:json)?/)[1] : text.strip
|
74
|
+
parsed = JSON.parse(json)
|
75
|
+
JSON::Validator.validate!(schema, parsed)
|
76
|
+
parsed
|
77
|
+
rescue => e
|
78
|
+
raise OutputParserException.new("Failed to parse. Text: \"#{text}\". Error: #{e}", text)
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def validate_schema!(schema)
|
84
|
+
errors = JSON::Validator.fully_validate_schema(schema)
|
85
|
+
unless errors.empty?
|
86
|
+
raise ArgumentError, "Invalid schema: \n#{errors.join("\n")}"
|
87
|
+
end
|
88
|
+
schema
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/langchain/tool/base.rb
CHANGED
@@ -27,7 +27,7 @@ module Langchain::Tool
|
|
27
27
|
#
|
28
28
|
# 3. Pass the tools when Agent is instantiated.
|
29
29
|
#
|
30
|
-
# agent = Langchain::Agent::
|
30
|
+
# agent = Langchain::Agent::ReActAgent.new(
|
31
31
|
# llm: :openai, # or :cohere, :hugging_face, :google_palm or :replicate
|
32
32
|
# llm_api_key: ENV["OPENAI_API_KEY"],
|
33
33
|
# tools: ["google_search", "calculator", "wikipedia"]
|
@@ -16,6 +16,11 @@ module Langchain::Tool
|
|
16
16
|
Useful for getting the result of a math expression.
|
17
17
|
|
18
18
|
The input to this tool should be a valid mathematical expression that could be executed by a simple calculator.
|
19
|
+
Usage:
|
20
|
+
Action Input: 1 + 1
|
21
|
+
Action Input: 3 * 2 / 4
|
22
|
+
Action Input: 9 - 7
|
23
|
+
Action Input: (4.1 + 2.3) / (2.0 - 5.6) * 3
|
19
24
|
DESC
|
20
25
|
|
21
26
|
def initialize
|
@@ -32,14 +37,7 @@ module Langchain::Tool
|
|
32
37
|
|
33
38
|
Eqn::Calculator.calc(input)
|
34
39
|
rescue Eqn::ParseError, Eqn::NoVariableValueError
|
35
|
-
#
|
36
|
-
# We can use the google answer box to evaluate this expression
|
37
|
-
# TODO: Figure out to find a better way to evaluate these language expressions.
|
38
|
-
hash_results = Langchain::Tool::GoogleSearch
|
39
|
-
.new(api_key: ENV["SERPAPI_API_KEY"])
|
40
|
-
.execute_search(input: input)
|
41
|
-
hash_results.dig(:answer_box, :to) ||
|
42
|
-
hash_results.dig(:answer_box, :result)
|
40
|
+
"\"#{input}\" is an invalid mathematical expression"
|
43
41
|
end
|
44
42
|
end
|
45
43
|
end
|
@@ -14,23 +14,26 @@ module Langchain::Tool
|
|
14
14
|
The input to this tool should be valid SQL.
|
15
15
|
DESC
|
16
16
|
|
17
|
-
attr_reader :db
|
17
|
+
attr_reader :db, :requested_tables, :excluded_tables
|
18
18
|
|
19
19
|
#
|
20
20
|
# Establish a database connection
|
21
21
|
#
|
22
22
|
# @param connection_string [String] Database connection info, e.g. 'postgres://user:password@localhost:5432/db_name'
|
23
|
+
# @param tables [Array<Symbol>] The tables to use. Will use all if empty.
|
24
|
+
# @param except_tables [Array<Symbol>] The tables to exclude. Will exclude none if empty.
|
25
|
+
|
23
26
|
# @return [Database] Database object
|
24
27
|
#
|
25
|
-
def initialize(connection_string:)
|
28
|
+
def initialize(connection_string:, tables: [], exclude_tables: [])
|
26
29
|
depends_on "sequel"
|
27
30
|
require "sequel"
|
28
|
-
require "sequel/extensions/schema_dumper"
|
29
31
|
|
30
32
|
raise StandardError, "connection_string parameter cannot be blank" if connection_string.empty?
|
31
33
|
|
32
34
|
@db = Sequel.connect(connection_string)
|
33
|
-
@
|
35
|
+
@requested_tables = tables
|
36
|
+
@excluded_tables = exclude_tables
|
34
37
|
end
|
35
38
|
|
36
39
|
#
|
@@ -38,9 +41,37 @@ module Langchain::Tool
|
|
38
41
|
#
|
39
42
|
# @return [String] schema
|
40
43
|
#
|
41
|
-
def
|
42
|
-
Langchain.logger.info("Dumping schema", for: self.class)
|
43
|
-
|
44
|
+
def dump_schema
|
45
|
+
Langchain.logger.info("Dumping schema tables and keys", for: self.class)
|
46
|
+
schema = ""
|
47
|
+
db.tables.each do |table|
|
48
|
+
next if excluded_tables.include?(table)
|
49
|
+
next unless requested_tables.empty? || requested_tables.include?(table)
|
50
|
+
|
51
|
+
primary_key_columns = []
|
52
|
+
primary_key_column_count = db.schema(table).count { |column| column[1][:primary_key] == true }
|
53
|
+
|
54
|
+
schema << "CREATE TABLE #{table}(\n"
|
55
|
+
db.schema(table).each do |column|
|
56
|
+
schema << "#{column[0]} #{column[1][:type]}"
|
57
|
+
if column[1][:primary_key] == true
|
58
|
+
schema << " PRIMARY KEY" if primary_key_column_count == 1
|
59
|
+
else
|
60
|
+
primary_key_columns << column[0]
|
61
|
+
end
|
62
|
+
schema << ",\n" unless column == db.schema(table).last && primary_key_column_count == 1
|
63
|
+
end
|
64
|
+
if primary_key_column_count > 1
|
65
|
+
schema << "PRIMARY KEY (#{primary_key_columns.join(",")})"
|
66
|
+
end
|
67
|
+
db.foreign_key_list(table).each do |fk|
|
68
|
+
schema << ",\n" if fk == db.foreign_key_list(table).first
|
69
|
+
schema << "FOREIGN KEY (#{fk[:columns][0]}) REFERENCES #{fk[:table]}(#{fk[:key][0]})"
|
70
|
+
schema << ",\n" unless fk == db.foreign_key_list(table).last
|
71
|
+
end
|
72
|
+
schema << ");\n"
|
73
|
+
end
|
74
|
+
schema
|
44
75
|
end
|
45
76
|
|
46
77
|
#
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Utils
|
5
|
+
module TokenLength
|
6
|
+
#
|
7
|
+
# This class is meant to validate the length of the text passed in to AI21's API.
|
8
|
+
# It is used to validate the token length before the API call is made
|
9
|
+
#
|
10
|
+
|
11
|
+
class AI21Validator < BaseValidator
|
12
|
+
TOKEN_LIMITS = {
|
13
|
+
"j2-ultra" => 8192,
|
14
|
+
"j2-mid" => 8192,
|
15
|
+
"j2-light" => 8192
|
16
|
+
}.freeze
|
17
|
+
|
18
|
+
#
|
19
|
+
# Calculate token length for a given text and model name
|
20
|
+
#
|
21
|
+
# @param text [String] The text to calculate the token length for
|
22
|
+
# @param model_name [String] The model name to validate against
|
23
|
+
# @return [Integer] The token length of the text
|
24
|
+
#
|
25
|
+
def self.token_length(text, model_name, client)
|
26
|
+
res = client.tokenize(text)
|
27
|
+
res.dig(:tokens).length
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.token_limit(model_name)
|
31
|
+
TOKEN_LIMITS[model_name]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Utils
|
5
|
+
module TokenLength
|
6
|
+
#
|
7
|
+
# This class is meant to validate the length of the text passed in to Cohere's API.
|
8
|
+
# It is used to validate the token length before the API call is made
|
9
|
+
#
|
10
|
+
|
11
|
+
class CohereValidator < BaseValidator
|
12
|
+
TOKEN_LIMITS = {
|
13
|
+
# Source:
|
14
|
+
# https://docs.cohere.com/docs/models
|
15
|
+
"command-light" => 4096,
|
16
|
+
"command" => 4096,
|
17
|
+
"base-light" => 2048,
|
18
|
+
"base" => 2048,
|
19
|
+
"embed-english-light-v2.0" => 512,
|
20
|
+
"embed-english-v2.0" => 512,
|
21
|
+
"embed-multilingual-v2.0" => 256,
|
22
|
+
"summarize-medium" => 2048,
|
23
|
+
"summarize-xlarge" => 2048
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
#
|
27
|
+
# Calculate token length for a given text and model name
|
28
|
+
#
|
29
|
+
# @param text [String] The text to calculate the token length for
|
30
|
+
# @param model_name [String] The model name to validate against
|
31
|
+
# @return [Integer] The token length of the text
|
32
|
+
#
|
33
|
+
def self.token_length(text, model_name, client)
|
34
|
+
res = client.tokenize(text: text)
|
35
|
+
res["tokens"].length
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.token_limit(model_name)
|
39
|
+
TOKEN_LIMITS[model_name]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
@@ -74,7 +74,7 @@ module Langchain
|
|
74
74
|
|
75
75
|
module Agent
|
76
76
|
autoload :Base, "langchain/agent/base"
|
77
|
-
autoload :
|
77
|
+
autoload :ReActAgent, "langchain/agent/react_agent/react_agent.rb"
|
78
78
|
autoload :SQLQueryAgent, "langchain/agent/sql_query_agent/sql_query_agent.rb"
|
79
79
|
end
|
80
80
|
|
@@ -108,9 +108,11 @@ module Langchain
|
|
108
108
|
module Utils
|
109
109
|
module TokenLength
|
110
110
|
autoload :BaseValidator, "langchain/utils/token_length/base_validator"
|
111
|
-
autoload :
|
112
|
-
autoload :
|
111
|
+
autoload :AI21Validator, "langchain/utils/token_length/ai21_validator"
|
112
|
+
autoload :CohereValidator, "langchain/utils/token_length/cohere_validator"
|
113
113
|
autoload :GooglePalmValidator, "langchain/utils/token_length/google_palm_validator"
|
114
|
+
autoload :OpenAIValidator, "langchain/utils/token_length/openai_validator"
|
115
|
+
autoload :TokenLimitExceeded, "langchain/utils/token_length/token_limit_exceeded"
|
114
116
|
end
|
115
117
|
end
|
116
118
|
|
@@ -143,6 +145,11 @@ module Langchain
|
|
143
145
|
autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
|
144
146
|
end
|
145
147
|
|
148
|
+
module OutputParsers
|
149
|
+
autoload :Base, "langchain/output_parsers/base"
|
150
|
+
autoload :StructuredOutputParser, "langchain/output_parsers/structured"
|
151
|
+
end
|
152
|
+
|
146
153
|
module Errors
|
147
154
|
class BaseError < StandardError; end
|
148
155
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.0.5
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: json-schema
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 4.0.0
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 4.0.0
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: dotenv-rails
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,14 +128,14 @@ dependencies:
|
|
114
128
|
requirements:
|
115
129
|
- - "~>"
|
116
130
|
- !ruby/object:Gem::Version
|
117
|
-
version: 0.2.
|
131
|
+
version: 0.2.1
|
118
132
|
type: :development
|
119
133
|
prerelease: false
|
120
134
|
version_requirements: !ruby/object:Gem::Requirement
|
121
135
|
requirements:
|
122
136
|
- - "~>"
|
123
137
|
- !ruby/object:Gem::Version
|
124
|
-
version: 0.2.
|
138
|
+
version: 0.2.1
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: chroma-db
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,14 +156,14 @@ dependencies:
|
|
142
156
|
requirements:
|
143
157
|
- - "~>"
|
144
158
|
- !ruby/object:Gem::Version
|
145
|
-
version: 0.9.
|
159
|
+
version: 0.9.5
|
146
160
|
type: :development
|
147
161
|
prerelease: false
|
148
162
|
version_requirements: !ruby/object:Gem::Requirement
|
149
163
|
requirements:
|
150
164
|
- - "~>"
|
151
165
|
- !ruby/object:Gem::Version
|
152
|
-
version: 0.9.
|
166
|
+
version: 0.9.5
|
153
167
|
- !ruby/object:Gem::Dependency
|
154
168
|
name: docx
|
155
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -184,14 +198,14 @@ dependencies:
|
|
184
198
|
requirements:
|
185
199
|
- - "~>"
|
186
200
|
- !ruby/object:Gem::Version
|
187
|
-
version: 0.1.
|
201
|
+
version: 0.1.2
|
188
202
|
type: :development
|
189
203
|
prerelease: false
|
190
204
|
version_requirements: !ruby/object:Gem::Requirement
|
191
205
|
requirements:
|
192
206
|
- - "~>"
|
193
207
|
- !ruby/object:Gem::Version
|
194
|
-
version: 0.1.
|
208
|
+
version: 0.1.2
|
195
209
|
- !ruby/object:Gem::Dependency
|
196
210
|
name: google_search_results
|
197
211
|
requirement: !ruby/object:Gem::Requirement
|
@@ -462,6 +476,7 @@ files:
|
|
462
476
|
- Rakefile
|
463
477
|
- examples/create_and_manage_few_shot_prompt_templates.rb
|
464
478
|
- examples/create_and_manage_prompt_templates.rb
|
479
|
+
- examples/create_and_manage_prompt_templates_using_structured_output_parser.rb
|
465
480
|
- examples/pdf_store_and_query_with_chroma.rb
|
466
481
|
- examples/store_and_query_with_pinecone.rb
|
467
482
|
- examples/store_and_query_with_qdrant.rb
|
@@ -469,8 +484,8 @@ files:
|
|
469
484
|
- lefthook.yml
|
470
485
|
- lib/langchain.rb
|
471
486
|
- lib/langchain/agent/base.rb
|
472
|
-
- lib/langchain/agent/
|
473
|
-
- lib/langchain/agent/
|
487
|
+
- lib/langchain/agent/react_agent/react_agent.rb
|
488
|
+
- lib/langchain/agent/react_agent/react_agent_prompt.yaml
|
474
489
|
- lib/langchain/agent/sql_query_agent/sql_query_agent.rb
|
475
490
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
|
476
491
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
|
@@ -489,6 +504,8 @@ files:
|
|
489
504
|
- lib/langchain/llm/prompts/summarize_template.yaml
|
490
505
|
- lib/langchain/llm/replicate.rb
|
491
506
|
- lib/langchain/loader.rb
|
507
|
+
- lib/langchain/output_parsers/base.rb
|
508
|
+
- lib/langchain/output_parsers/structured.rb
|
492
509
|
- lib/langchain/processors/base.rb
|
493
510
|
- lib/langchain/processors/csv.rb
|
494
511
|
- lib/langchain/processors/docx.rb
|
@@ -509,7 +526,9 @@ files:
|
|
509
526
|
- lib/langchain/tool/ruby_code_interpreter.rb
|
510
527
|
- lib/langchain/tool/weather.rb
|
511
528
|
- lib/langchain/tool/wikipedia.rb
|
529
|
+
- lib/langchain/utils/token_length/ai21_validator.rb
|
512
530
|
- lib/langchain/utils/token_length/base_validator.rb
|
531
|
+
- lib/langchain/utils/token_length/cohere_validator.rb
|
513
532
|
- lib/langchain/utils/token_length/google_palm_validator.rb
|
514
533
|
- lib/langchain/utils/token_length/openai_validator.rb
|
515
534
|
- lib/langchain/utils/token_length/token_limit_exceeded.rb
|