langchainrb 0.5.7 → 0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +11 -8
- data/README.md +5 -5
- data/examples/create_and_manage_prompt_templates_using_structured_output_parser.rb +104 -0
- data/lib/langchain/active_record/hooks.rb +96 -0
- data/lib/langchain/agent/base.rb +1 -1
- data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent.rb → react_agent/react_agent.rb} +5 -5
- data/lib/langchain/data.rb +8 -0
- data/lib/langchain/llm/ai21.rb +6 -2
- data/lib/langchain/llm/cohere.rb +7 -3
- data/lib/langchain/llm/google_palm.rb +3 -12
- data/lib/langchain/loader.rb +41 -9
- data/lib/langchain/output_parsers/base.rb +45 -0
- data/lib/langchain/output_parsers/structured.rb +91 -0
- data/lib/langchain/railtie.rb +11 -0
- data/lib/langchain/tool/base.rb +1 -1
- data/lib/langchain/tool/database.rb +20 -10
- data/lib/langchain/utils/token_length/ai21_validator.rb +36 -0
- data/lib/langchain/utils/token_length/cohere_validator.rb +44 -0
- data/lib/langchain/vectorsearch/weaviate.rb +59 -17
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +16 -3
- metadata +31 -10
- /data/lib/langchain/agent/{chain_of_thought_agent/chain_of_thought_agent_prompt.yaml → react_agent/react_agent_prompt.yaml} +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5a6f4e8bb8ecaba6ff4d53bba384bd6338012429a69a0dc7df0a58a476763e7e
|
4
|
+
data.tar.gz: 92211a22fca9664831cf4f395a53dedddafc339ab419780932398c07256b737d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5c84f0a9a54f51799c5318cba243457fcfd6f026c71b8f34e58cf60172d476963f25ea8d24c49b35ed93c893adb9e2844443a22dd9e927ab16318850a11419a
|
7
|
+
data.tar.gz: 4664927203ea032f737000c27ec5fa04c96ab606ec8377b4673b48638905b458077d4ab3cb7727fcb98be6c607a37bd318395fd96000a734de213c7d9041a219
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.6.1] - 2023-06-24
|
4
|
+
- Adding support to hook vectorsearch into ActiveRecord models
|
5
|
+
|
6
|
+
## [0.6.0] - 2023-06-22
|
7
|
+
- [BREAKING] Rename `ChainOfThoughtAgent` to `ReActAgent`
|
8
|
+
- Implement A21 token validator
|
9
|
+
- Add `Langchain::OutputParsers`
|
10
|
+
|
3
11
|
## [0.5.7] - 2023-06-19
|
4
12
|
- Developer can modify models used when initiliazing `Langchain::LLM::*` clients
|
5
13
|
- Improvements to the `SQLQueryAgent` and the database tool
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb (0.
|
4
|
+
langchainrb (0.6.1)
|
5
5
|
baran (~> 0.1.6)
|
6
6
|
colorize (~> 0.8.1)
|
7
|
+
json-schema (~> 4.0.0)
|
7
8
|
tiktoken_ruby (~> 0.0.5)
|
8
9
|
|
9
10
|
GEM
|
@@ -31,7 +32,7 @@ GEM
|
|
31
32
|
addressable (2.8.4)
|
32
33
|
public_suffix (>= 2.0.2, < 6.0)
|
33
34
|
afm (0.2.2)
|
34
|
-
ai21 (0.2.
|
35
|
+
ai21 (0.2.1)
|
35
36
|
ast (2.4.2)
|
36
37
|
baran (0.1.6)
|
37
38
|
builder (3.2.4)
|
@@ -41,7 +42,7 @@ GEM
|
|
41
42
|
dry-monads (~> 1.6)
|
42
43
|
ruby-next-core (>= 0.15.0)
|
43
44
|
coderay (1.1.3)
|
44
|
-
cohere-ruby (0.9.
|
45
|
+
cohere-ruby (0.9.5)
|
45
46
|
faraday (>= 1.0.0)
|
46
47
|
faraday_middleware (>= 1.0.0)
|
47
48
|
colorize (0.8.1)
|
@@ -124,7 +125,7 @@ GEM
|
|
124
125
|
faraday-retry (1.0.3)
|
125
126
|
faraday_middleware (1.2.0)
|
126
127
|
faraday (~> 1.0)
|
127
|
-
google_palm_api (0.1.
|
128
|
+
google_palm_api (0.1.2)
|
128
129
|
faraday (>= 1.0.0)
|
129
130
|
faraday_middleware (>= 1.0.0)
|
130
131
|
google_search_results (2.0.1)
|
@@ -148,6 +149,8 @@ GEM
|
|
148
149
|
concurrent-ruby (~> 1.0)
|
149
150
|
ice_nine (0.11.2)
|
150
151
|
json (2.6.3)
|
152
|
+
json-schema (4.0.0)
|
153
|
+
addressable (>= 2.8)
|
151
154
|
language_server-protocol (3.17.0.3)
|
152
155
|
lint_roller (1.0.0)
|
153
156
|
loofah (2.21.1)
|
@@ -219,7 +222,7 @@ GEM
|
|
219
222
|
zeitwerk (~> 2.5)
|
220
223
|
rainbow (3.1.1)
|
221
224
|
rake (13.0.6)
|
222
|
-
rb_sys (0.9.
|
225
|
+
rb_sys (0.9.79)
|
223
226
|
rdiscount (2.2.7)
|
224
227
|
regexp_parser (2.8.0)
|
225
228
|
replicate-ruby (0.2.2)
|
@@ -313,13 +316,13 @@ PLATFORMS
|
|
313
316
|
x86_64-linux
|
314
317
|
|
315
318
|
DEPENDENCIES
|
316
|
-
ai21 (~> 0.2.
|
319
|
+
ai21 (~> 0.2.1)
|
317
320
|
chroma-db (~> 0.3.0)
|
318
|
-
cohere-ruby (~> 0.9.
|
321
|
+
cohere-ruby (~> 0.9.5)
|
319
322
|
docx (~> 0.8.0)
|
320
323
|
dotenv-rails (~> 2.7.6)
|
321
324
|
eqn (~> 1.6.5)
|
322
|
-
google_palm_api (~> 0.1.
|
325
|
+
google_palm_api (~> 0.1.2)
|
323
326
|
google_search_results (~> 2.0.0)
|
324
327
|
hnswlib (~> 0.8.1)
|
325
328
|
hugging-face (~> 0.3.4)
|
data/README.md
CHANGED
@@ -41,7 +41,7 @@ require "langchain"
|
|
41
41
|
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
42
42
|
| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
43
43
|
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
44
|
-
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
44
|
+
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
45
45
|
|
46
46
|
### Using Vector Search Databases 🔍
|
47
47
|
|
@@ -155,13 +155,13 @@ replicate = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
|
|
155
155
|
```
|
156
156
|
|
157
157
|
#### Google PaLM (Pathways Language Model)
|
158
|
-
Add `"google_palm_api", "~> 0.1.
|
158
|
+
Add `"google_palm_api", "~> 0.1.2"` to your Gemfile.
|
159
159
|
```ruby
|
160
160
|
google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
|
161
161
|
```
|
162
162
|
|
163
163
|
#### AI21
|
164
|
-
Add `gem "ai21", "~> 0.2.
|
164
|
+
Add `gem "ai21", "~> 0.2.1"` to your Gemfile.
|
165
165
|
```ruby
|
166
166
|
ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
|
167
167
|
```
|
@@ -261,7 +261,7 @@ prompt.input_variables #=> ["adjective", "content"]
|
|
261
261
|
### Using Agents 🤖
|
262
262
|
Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
|
263
263
|
|
264
|
-
####
|
264
|
+
#### ReAct Agent
|
265
265
|
|
266
266
|
Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
|
267
267
|
|
@@ -271,7 +271,7 @@ calculator = Langchain::Tool::Calculator.new
|
|
271
271
|
|
272
272
|
openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
273
273
|
|
274
|
-
agent = Langchain::Agent::
|
274
|
+
agent = Langchain::Agent::ReActAgent.new(
|
275
275
|
llm: openai,
|
276
276
|
tools: [search_tool, calculator]
|
277
277
|
)
|
@@ -0,0 +1,104 @@
|
|
1
|
+
require "langchain"
|
2
|
+
|
3
|
+
# Generate a prompt that directs the LLM to provide a JSON response that adheres to a specific JSON schema.
|
4
|
+
json_schema = {
|
5
|
+
type: "object",
|
6
|
+
properties: {
|
7
|
+
name: {
|
8
|
+
type: "string",
|
9
|
+
description: "Persons name"
|
10
|
+
},
|
11
|
+
age: {
|
12
|
+
type: "number",
|
13
|
+
description: "Persons age"
|
14
|
+
},
|
15
|
+
interests: {
|
16
|
+
type: "array",
|
17
|
+
items: {
|
18
|
+
type: "object",
|
19
|
+
properties: {
|
20
|
+
interest: {
|
21
|
+
type: "string",
|
22
|
+
description: "A topic of interest"
|
23
|
+
},
|
24
|
+
levelOfInterest: {
|
25
|
+
type: "number",
|
26
|
+
description: "A value between 0 and 100 of how interested the person is in this interest"
|
27
|
+
}
|
28
|
+
},
|
29
|
+
required: ["interest", "levelOfInterest"],
|
30
|
+
additionalProperties: false
|
31
|
+
},
|
32
|
+
minItems: 1,
|
33
|
+
maxItems: 3,
|
34
|
+
description: "A list of the person's interests"
|
35
|
+
}
|
36
|
+
},
|
37
|
+
required: ["name", "age", "interests"],
|
38
|
+
additionalProperties: false
|
39
|
+
}
|
40
|
+
parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema(json_schema)
|
41
|
+
prompt = Langchain::Prompt::PromptTemplate.new(template: "Generate details of a fictional character.\n{format_instructions}\nCharacter description: {description}", input_variables: ["description", "format_instructions"])
|
42
|
+
prompt.format(description: "Korean chemistry student", format_instructions: parser.get_format_instructions)
|
43
|
+
# Generate details of a fictional character.
|
44
|
+
# You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
|
45
|
+
|
46
|
+
# "JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
|
47
|
+
|
48
|
+
# For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}, "required": ["foo"]}
|
49
|
+
# would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
|
50
|
+
# Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
|
51
|
+
|
52
|
+
# Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
|
53
|
+
|
54
|
+
# Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
|
55
|
+
# ```json
|
56
|
+
# {"type":"object","properties":{"name":{"type":"string","description":"Persons name"},"age":{"type":"number","description":"Persons age"},"interests":{"type":"array","items":{"type":"object","properties":{"interest":{"type":"string","description":"A topic of interest"},"levelOfInterest":{"type":"number","description":"A value between 0 and 100 of how interested the person is in this interest"},"required":["interest","levelOfInterest"],"additionalProperties":false},"minItems":1,"maxItems":3,"description":"A list of the person's interests"},"required":["name","age","interests"],"additionalProperties":false}
|
57
|
+
# ```
|
58
|
+
|
59
|
+
# Character description: Korean chemistry student
|
60
|
+
|
61
|
+
# LLM example response:
|
62
|
+
llm_example_response = <<~RESPONSE
|
63
|
+
Here is your character:
|
64
|
+
```json
|
65
|
+
{
|
66
|
+
"name": "Kim Ji-hyun",
|
67
|
+
"age": 22,
|
68
|
+
"interests": [
|
69
|
+
{
|
70
|
+
"interest": "Organic Chemistry",
|
71
|
+
"levelOfInterest": 85
|
72
|
+
},
|
73
|
+
{
|
74
|
+
"interest": "Biochemistry",
|
75
|
+
"levelOfInterest": 70
|
76
|
+
},
|
77
|
+
{
|
78
|
+
"interest": "Analytical Chemistry",
|
79
|
+
"levelOfInterest": 60
|
80
|
+
}
|
81
|
+
]
|
82
|
+
}
|
83
|
+
```
|
84
|
+
RESPONSE
|
85
|
+
|
86
|
+
parser.parse(llm_example_response)
|
87
|
+
# {
|
88
|
+
# "name" => "Kim Ji-hyun",
|
89
|
+
# "age" => 22,
|
90
|
+
# "interests" => [
|
91
|
+
# {
|
92
|
+
# "interest" => "Organic Chemistry",
|
93
|
+
# "levelOfInterest" => 85
|
94
|
+
# },
|
95
|
+
# {
|
96
|
+
# "interest" => "Biochemistry",
|
97
|
+
# "levelOfInterest" => 70
|
98
|
+
# },
|
99
|
+
# {
|
100
|
+
# "interest" => "Analytical Chemistry",
|
101
|
+
# "levelOfInterest" => 60
|
102
|
+
# }
|
103
|
+
# ]
|
104
|
+
# }
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module ActiveRecord
|
5
|
+
# This module adds the following functionality to your ActiveRecord models:
|
6
|
+
# * `vectorsearch` class method to set the vector search provider
|
7
|
+
# * `similarity_search` class method to search for similar texts
|
8
|
+
# * `upsert_to_vectorsearch` instance method to upsert the record to the vector search provider
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# class Recipe < ActiveRecord::Base
|
12
|
+
# vectorsearch provider: Langchain::Vectorsearch::Weaviate.new(
|
13
|
+
# api_key: ENV["WEAVIATE_API_KEY"],
|
14
|
+
# url: ENV["WEAVIATE_URL"],
|
15
|
+
# index_name: "Recipes",
|
16
|
+
# llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
17
|
+
# )
|
18
|
+
#
|
19
|
+
# after_save :upsert_to_vectorsearch
|
20
|
+
#
|
21
|
+
# # Overwriting how the model is serialized before it's indexed
|
22
|
+
# def as_vector
|
23
|
+
# [
|
24
|
+
# "Title: #{title}",
|
25
|
+
# "Description: #{description}",
|
26
|
+
# ...
|
27
|
+
# ]
|
28
|
+
# .compact
|
29
|
+
# .join("\n")
|
30
|
+
# end
|
31
|
+
# end
|
32
|
+
#
|
33
|
+
# Create the default schema
|
34
|
+
# Recipe.class_variable_get(:@@provider).create_default_schema
|
35
|
+
# Query the vector search provider
|
36
|
+
# Recipe.similarity_search("carnivore dish")
|
37
|
+
# Delete the default schema to start over
|
38
|
+
# Recipe.class_variable_get(:@@provider).client.schema.delete class_name: "Recipes"
|
39
|
+
#
|
40
|
+
module Hooks
|
41
|
+
def self.included(base)
|
42
|
+
base.extend ClassMethods
|
43
|
+
end
|
44
|
+
|
45
|
+
# Index the text to the vector search provider
|
46
|
+
# You'd typically call this method in an ActiveRecord callback
|
47
|
+
#
|
48
|
+
# @return [Boolean] true
|
49
|
+
# @raise [Error] Indexing to vector search DB failed
|
50
|
+
def upsert_to_vectorsearch
|
51
|
+
if previously_new_record?
|
52
|
+
self.class.class_variable_get(:@@provider).add_texts(
|
53
|
+
texts: [as_vector],
|
54
|
+
ids: [id]
|
55
|
+
)
|
56
|
+
else
|
57
|
+
self.class.class_variable_get(:@@provider).update_texts(
|
58
|
+
texts: [as_vector],
|
59
|
+
ids: [id]
|
60
|
+
)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# Used to serialize the DB record to an indexable vector text
|
65
|
+
# Overwrite this method in your model to customize
|
66
|
+
#
|
67
|
+
# @return [String] the text representation of the model
|
68
|
+
def as_vector
|
69
|
+
to_json
|
70
|
+
end
|
71
|
+
|
72
|
+
module ClassMethods
|
73
|
+
# Set the vector search provider
|
74
|
+
#
|
75
|
+
# @param provider [Object] The `Langchain::Vectorsearch::*` instance
|
76
|
+
def vectorsearch(provider:)
|
77
|
+
class_variable_set(:@@provider, provider)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Search for similar texts
|
81
|
+
#
|
82
|
+
# @param query [String] The query to search for
|
83
|
+
# @param k [Integer] The number of results to return
|
84
|
+
# @return [ActiveRecord::Relation] The ActiveRecord relation
|
85
|
+
def similarity_search(query, k: 1)
|
86
|
+
records = class_variable_get(:@@provider).similarity_search(
|
87
|
+
query: query,
|
88
|
+
k: k
|
89
|
+
)
|
90
|
+
ids = records.map { |record| record.dig("__id") }
|
91
|
+
where(id: ids)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
data/lib/langchain/agent/base.rb
CHANGED
@@ -6,7 +6,7 @@ module Langchain::Agent
|
|
6
6
|
# Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
|
7
7
|
#
|
8
8
|
# Available:
|
9
|
-
# - {Langchain::Agent::
|
9
|
+
# - {Langchain::Agent::ReActAgent}
|
10
10
|
#
|
11
11
|
# @abstract
|
12
12
|
class Base
|
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Langchain::Agent
|
4
|
-
# =
|
4
|
+
# = ReAct Agent
|
5
5
|
#
|
6
6
|
# llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]) # or your choice of Langchain::LLM::Base implementation
|
7
7
|
#
|
8
|
-
# agent = Langchain::Agent::
|
8
|
+
# agent = Langchain::Agent::ReActAgent.new(
|
9
9
|
# llm: llm,
|
10
10
|
# tools: ["google_search", "calculator", "wikipedia"]
|
11
11
|
# )
|
@@ -15,7 +15,7 @@ module Langchain::Agent
|
|
15
15
|
#
|
16
16
|
# agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
|
17
17
|
# #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
|
18
|
-
class
|
18
|
+
class ReActAgent < Base
|
19
19
|
attr_reader :llm, :tools, :max_iterations
|
20
20
|
|
21
21
|
# Initializes the Agent
|
@@ -23,7 +23,7 @@ module Langchain::Agent
|
|
23
23
|
# @param llm [Object] The LLM client to use
|
24
24
|
# @param tools [Array] The tools to use
|
25
25
|
# @param max_iterations [Integer] The maximum number of iterations to run
|
26
|
-
# @return [
|
26
|
+
# @return [ReActAgent] The Agent::ReActAgent instance
|
27
27
|
def initialize(llm:, tools: [], max_iterations: 10)
|
28
28
|
Langchain::Tool::Base.validate_tools!(tools: tools)
|
29
29
|
|
@@ -117,7 +117,7 @@ module Langchain::Agent
|
|
117
117
|
# @return [PromptTemplate] PromptTemplate instance
|
118
118
|
def prompt_template
|
119
119
|
@template ||= Langchain::Prompt.load_from_path(
|
120
|
-
file_path: Langchain.root.join("langchain/agent/
|
120
|
+
file_path: Langchain.root.join("langchain/agent/react_agent/react_agent_prompt.yaml")
|
121
121
|
)
|
122
122
|
end
|
123
123
|
|
data/lib/langchain/data.rb
CHANGED
@@ -1,18 +1,26 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module Langchain
|
4
|
+
# Abstraction for data loaded by a {Langchain::Loader}
|
4
5
|
class Data
|
6
|
+
# URL or Path of the data source
|
7
|
+
# @return [String]
|
5
8
|
attr_reader :source
|
6
9
|
|
10
|
+
# @param data [String] data that was loaded
|
11
|
+
# @option options [String] :source URL or Path of the data source
|
7
12
|
def initialize(data, options = {})
|
8
13
|
@source = options[:source]
|
9
14
|
@data = data
|
10
15
|
end
|
11
16
|
|
17
|
+
# @return [String]
|
12
18
|
def value
|
13
19
|
@data
|
14
20
|
end
|
15
21
|
|
22
|
+
# @param opts [Hash] options passed to the chunker
|
23
|
+
# @return [Array<String>]
|
16
24
|
def chunks(opts = {})
|
17
25
|
Langchain::Chunker::Text.new(@data, **opts).chunks
|
18
26
|
end
|
data/lib/langchain/llm/ai21.rb
CHANGED
@@ -5,7 +5,7 @@ module Langchain::LLM
|
|
5
5
|
# Wrapper around AI21 Studio APIs.
|
6
6
|
#
|
7
7
|
# Gem requirements:
|
8
|
-
# gem "ai21", "~> 0.2.
|
8
|
+
# gem "ai21", "~> 0.2.1"
|
9
9
|
#
|
10
10
|
# Usage:
|
11
11
|
# ai21 = Langchain::LLM::AI21.new(api_key:)
|
@@ -13,9 +13,11 @@ module Langchain::LLM
|
|
13
13
|
class AI21 < Base
|
14
14
|
DEFAULTS = {
|
15
15
|
temperature: 0.0,
|
16
|
-
model: "j2-
|
16
|
+
model: "j2-ultra"
|
17
17
|
}.freeze
|
18
18
|
|
19
|
+
LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AI21Validator
|
20
|
+
|
19
21
|
def initialize(api_key:, default_options: {})
|
20
22
|
depends_on "ai21"
|
21
23
|
require "ai21"
|
@@ -34,6 +36,8 @@ module Langchain::LLM
|
|
34
36
|
def complete(prompt:, **params)
|
35
37
|
parameters = complete_parameters params
|
36
38
|
|
39
|
+
parameters[:maxTokens] = LENGTH_VALIDATOR.validate_max_tokens!(prompt, parameters[:model], client)
|
40
|
+
|
37
41
|
response = client.complete(prompt, parameters)
|
38
42
|
response.dig(:completions, 0, :data, :text)
|
39
43
|
end
|
data/lib/langchain/llm/cohere.rb
CHANGED
@@ -13,9 +13,10 @@ module Langchain::LLM
|
|
13
13
|
class Cohere < Base
|
14
14
|
DEFAULTS = {
|
15
15
|
temperature: 0.0,
|
16
|
-
completion_model_name: "
|
16
|
+
completion_model_name: "command",
|
17
17
|
embeddings_model_name: "small",
|
18
|
-
dimension: 1024
|
18
|
+
dimension: 1024,
|
19
|
+
truncate: "START"
|
19
20
|
}.freeze
|
20
21
|
|
21
22
|
def initialize(api_key:, default_options: {})
|
@@ -51,7 +52,8 @@ module Langchain::LLM
|
|
51
52
|
default_params = {
|
52
53
|
prompt: prompt,
|
53
54
|
temperature: @defaults[:temperature],
|
54
|
-
model: @defaults[:completion_model_name]
|
55
|
+
model: @defaults[:completion_model_name],
|
56
|
+
truncate: @defaults[:truncate]
|
55
57
|
}
|
56
58
|
|
57
59
|
if params[:stop_sequences]
|
@@ -60,6 +62,8 @@ module Langchain::LLM
|
|
60
62
|
|
61
63
|
default_params.merge!(params)
|
62
64
|
|
65
|
+
default_params[:max_tokens] = Langchain::Utils::TokenLength::CohereValidator.validate_max_tokens!(prompt, default_params[:model], client)
|
66
|
+
|
63
67
|
response = client.generate(**default_params)
|
64
68
|
response.dig("generations").first.dig("text")
|
65
69
|
end
|
@@ -5,21 +5,12 @@ module Langchain::LLM
|
|
5
5
|
# Wrapper around the Google PaLM (Pathways Language Model) APIs: https://ai.google/build/machine-learning/
|
6
6
|
#
|
7
7
|
# Gem requirements:
|
8
|
-
# gem "google_palm_api", "~> 0.1.
|
8
|
+
# gem "google_palm_api", "~> 0.1.2"
|
9
9
|
#
|
10
10
|
# Usage:
|
11
11
|
# google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
|
12
12
|
#
|
13
13
|
class GooglePalm < Base
|
14
|
-
#
|
15
|
-
# Wrapper around the Google PaLM (Pathways Language Model) APIs.
|
16
|
-
#
|
17
|
-
# Gem requirements: gem "google_palm_api", "~> 0.1.1"
|
18
|
-
#
|
19
|
-
# Usage:
|
20
|
-
# google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
|
21
|
-
#
|
22
|
-
|
23
14
|
DEFAULTS = {
|
24
15
|
temperature: 0.0,
|
25
16
|
dimension: 768, # This is what the `embedding-gecko-001` model generates
|
@@ -61,7 +52,7 @@ module Langchain::LLM
|
|
61
52
|
default_params = {
|
62
53
|
prompt: prompt,
|
63
54
|
temperature: @defaults[:temperature],
|
64
|
-
|
55
|
+
model: @defaults[:completion_model_name]
|
65
56
|
}
|
66
57
|
|
67
58
|
if params[:stop_sequences]
|
@@ -91,7 +82,7 @@ module Langchain::LLM
|
|
91
82
|
|
92
83
|
default_params = {
|
93
84
|
temperature: @defaults[:temperature],
|
94
|
-
|
85
|
+
model: @defaults[:chat_completion_model_name],
|
95
86
|
context: context,
|
96
87
|
messages: compose_chat_messages(prompt: prompt, messages: messages),
|
97
88
|
examples: compose_examples(examples)
|
data/lib/langchain/loader.rb
CHANGED
@@ -10,32 +10,64 @@ module Langchain
|
|
10
10
|
|
11
11
|
URI_REGEX = %r{\A[A-Za-z][A-Za-z0-9+\-.]*://}
|
12
12
|
|
13
|
-
# Load data from a file or
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
13
|
+
# Load data from a file or URL. Shorthand for `Langchain::Loader.new(path).load`
|
14
|
+
#
|
15
|
+
# == Examples
|
16
|
+
#
|
17
|
+
# # load a URL
|
18
|
+
# data = Langchain::Loader.load("https://example.com/docs/README.md")
|
19
|
+
#
|
20
|
+
# # load a file
|
21
|
+
# data = Langchain::Loader.load("README.md")
|
22
|
+
#
|
23
|
+
# # Load data using a custom processor
|
24
|
+
# data = Langchain::Loader.load("README.md") do |raw_data, options|
|
25
|
+
# # your processing code goes here
|
26
|
+
# # return data at the end here
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# @param path [String | Pathname] path to file or URL
|
30
|
+
# @param options [Hash] options passed to the processor class used to process the data
|
31
|
+
# @return [Data] data loaded from path
|
17
32
|
def self.load(path, options = {}, &block)
|
18
33
|
new(path, options).load(&block)
|
19
34
|
end
|
20
35
|
|
21
36
|
# Initialize Langchain::Loader
|
22
|
-
# @param path [String | Pathname] path to file or
|
37
|
+
# @param path [String | Pathname] path to file or URL
|
38
|
+
# @param options [Hash] options passed to the processor class used to process the data
|
23
39
|
# @return [Langchain::Loader] loader instance
|
24
40
|
def initialize(path, options = {})
|
25
41
|
@options = options
|
26
42
|
@path = path
|
27
43
|
end
|
28
44
|
|
29
|
-
#
|
30
|
-
#
|
45
|
+
# Is the path a URL?
|
46
|
+
#
|
47
|
+
# @return [Boolean] true if path is URL
|
31
48
|
def url?
|
32
49
|
return false if @path.is_a?(Pathname)
|
33
50
|
|
34
51
|
!!(@path =~ URI_REGEX)
|
35
52
|
end
|
36
53
|
|
37
|
-
# Load data from a file or
|
38
|
-
#
|
54
|
+
# Load data from a file or URL
|
55
|
+
#
|
56
|
+
# loader = Langchain::Loader.new("README.md")
|
57
|
+
# # Load data using default processor for the file
|
58
|
+
# loader.load
|
59
|
+
#
|
60
|
+
# # Load data using a custom processor
|
61
|
+
# loader.load do |raw_data, options|
|
62
|
+
# # your processing code goes here
|
63
|
+
# # return data at the end here
|
64
|
+
# end
|
65
|
+
#
|
66
|
+
# @yield [String, Hash] handle parsing raw output into string directly
|
67
|
+
# @yieldparam [String] raw_data from the loaded URL or file
|
68
|
+
# @yieldreturn [String] parsed data, as a String
|
69
|
+
#
|
70
|
+
# @return [Data] data that was loaded
|
39
71
|
def load(&block)
|
40
72
|
@raw_data = url? ? load_from_url : load_from_path
|
41
73
|
|
@@ -0,0 +1,45 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain::OutputParsers
|
4
|
+
# Structured output parsers from the LLM.
|
5
|
+
#
|
6
|
+
# @abstract
|
7
|
+
class Base
|
8
|
+
#
|
9
|
+
# Parse the output of an LLM call.
|
10
|
+
#
|
11
|
+
# @param text - LLM output to parse.
|
12
|
+
# @returns Parsed output.
|
13
|
+
#
|
14
|
+
def parse(text:)
|
15
|
+
raise NotImplementedError
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Return a string describing the format of the output.
|
20
|
+
#
|
21
|
+
# @returns Format instructions.
|
22
|
+
# @param options - Options for formatting instructions.
|
23
|
+
# @example
|
24
|
+
# ```json
|
25
|
+
# {
|
26
|
+
# "foo": "bar"
|
27
|
+
# }
|
28
|
+
# ```
|
29
|
+
#
|
30
|
+
def get_format_instructions
|
31
|
+
raise NotImplementedError
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class OutputParserException < StandardError
|
36
|
+
def initialize(message, text)
|
37
|
+
@message = message
|
38
|
+
@text = text
|
39
|
+
end
|
40
|
+
|
41
|
+
def to_s
|
42
|
+
"#{@message}\nText: #{@text}"
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
require "json-schema"
|
5
|
+
|
6
|
+
module Langchain::OutputParsers
|
7
|
+
# = Structured Output Parser
|
8
|
+
#
|
9
|
+
class StructuredOutputParser < Base
|
10
|
+
attr_reader :schema
|
11
|
+
|
12
|
+
#
|
13
|
+
# Initializes a new instance of the class.
|
14
|
+
#
|
15
|
+
# @param schema [JSON::Schema] The json schema
|
16
|
+
#
|
17
|
+
def initialize(schema:)
|
18
|
+
@schema = validate_schema!(schema)
|
19
|
+
end
|
20
|
+
|
21
|
+
def to_h
|
22
|
+
{
|
23
|
+
_type: "StructuredOutputParser",
|
24
|
+
schema: schema.to_json
|
25
|
+
}
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# Creates a new instance of the class using the given JSON::Schema.
|
30
|
+
#
|
31
|
+
# @param schema [JSON::Schema] The JSON::Schema to use
|
32
|
+
#
|
33
|
+
# @return [Object] A new instance of the class
|
34
|
+
#
|
35
|
+
def self.from_json_schema(schema)
|
36
|
+
new(schema: schema)
|
37
|
+
end
|
38
|
+
|
39
|
+
#
|
40
|
+
# Returns a string containing instructions for how the output of a language model should be formatted
|
41
|
+
# according to the @schema.
|
42
|
+
#
|
43
|
+
# @return [String] Instructions for how the output of a language model should be formatted
|
44
|
+
# according to the @schema.
|
45
|
+
#
|
46
|
+
def get_format_instructions
|
47
|
+
<<~INSTRUCTIONS
|
48
|
+
You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
|
49
|
+
|
50
|
+
"JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
|
51
|
+
|
52
|
+
For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}
|
53
|
+
would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
|
54
|
+
Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}}} is not well-formatted.
|
55
|
+
|
56
|
+
Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
|
57
|
+
|
58
|
+
Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
|
59
|
+
```json
|
60
|
+
#{schema.to_json}
|
61
|
+
```
|
62
|
+
INSTRUCTIONS
|
63
|
+
end
|
64
|
+
|
65
|
+
#
|
66
|
+
# Parse the output of an LLM call extracting an object that abides by the @schema
|
67
|
+
#
|
68
|
+
# @param text [String] Text output from the LLM call
|
69
|
+
#
|
70
|
+
# @return [Object] object that abides by the @schema
|
71
|
+
#
|
72
|
+
def parse(text)
|
73
|
+
json = text.include?("```") ? text.strip.split(/```(?:json)?/)[1] : text.strip
|
74
|
+
parsed = JSON.parse(json)
|
75
|
+
JSON::Validator.validate!(schema, parsed)
|
76
|
+
parsed
|
77
|
+
rescue => e
|
78
|
+
raise OutputParserException.new("Failed to parse. Text: \"#{text}\". Error: #{e}", text)
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def validate_schema!(schema)
|
84
|
+
errors = JSON::Validator.fully_validate_schema(schema)
|
85
|
+
unless errors.empty?
|
86
|
+
raise ArgumentError, "Invalid schema: \n#{errors.join("\n")}"
|
87
|
+
end
|
88
|
+
schema
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
data/lib/langchain/tool/base.rb
CHANGED
@@ -27,7 +27,7 @@ module Langchain::Tool
|
|
27
27
|
#
|
28
28
|
# 3. Pass the tools when Agent is instantiated.
|
29
29
|
#
|
30
|
-
# agent = Langchain::Agent::
|
30
|
+
# agent = Langchain::Agent::ReActAgent.new(
|
31
31
|
# llm: :openai, # or :cohere, :hugging_face, :google_palm or :replicate
|
32
32
|
# llm_api_key: ENV["OPENAI_API_KEY"],
|
33
33
|
# tools: ["google_search", "calculator", "wikipedia"]
|
@@ -14,7 +14,7 @@ module Langchain::Tool
|
|
14
14
|
The input to this tool should be valid SQL.
|
15
15
|
DESC
|
16
16
|
|
17
|
-
attr_reader :db, :requested_tables, :
|
17
|
+
attr_reader :db, :requested_tables, :excluded_tables
|
18
18
|
|
19
19
|
#
|
20
20
|
# Establish a database connection
|
@@ -25,16 +25,15 @@ module Langchain::Tool
|
|
25
25
|
|
26
26
|
# @return [Database] Database object
|
27
27
|
#
|
28
|
-
def initialize(connection_string:, tables: [],
|
28
|
+
def initialize(connection_string:, tables: [], exclude_tables: [])
|
29
29
|
depends_on "sequel"
|
30
30
|
require "sequel"
|
31
|
-
require "sequel/extensions/schema_dumper"
|
32
31
|
|
33
32
|
raise StandardError, "connection_string parameter cannot be blank" if connection_string.empty?
|
34
33
|
|
35
34
|
@db = Sequel.connect(connection_string)
|
36
35
|
@requested_tables = tables
|
37
|
-
@
|
36
|
+
@excluded_tables = exclude_tables
|
38
37
|
end
|
39
38
|
|
40
39
|
#
|
@@ -46,20 +45,31 @@ module Langchain::Tool
|
|
46
45
|
Langchain.logger.info("Dumping schema tables and keys", for: self.class)
|
47
46
|
schema = ""
|
48
47
|
db.tables.each do |table|
|
49
|
-
next if
|
48
|
+
next if excluded_tables.include?(table)
|
50
49
|
next unless requested_tables.empty? || requested_tables.include?(table)
|
51
50
|
|
51
|
+
primary_key_columns = []
|
52
|
+
primary_key_column_count = db.schema(table).count { |column| column[1][:primary_key] == true }
|
53
|
+
|
52
54
|
schema << "CREATE TABLE #{table}(\n"
|
53
55
|
db.schema(table).each do |column|
|
54
56
|
schema << "#{column[0]} #{column[1][:type]}"
|
55
|
-
|
56
|
-
|
57
|
-
|
57
|
+
if column[1][:primary_key] == true
|
58
|
+
schema << " PRIMARY KEY" if primary_key_column_count == 1
|
59
|
+
else
|
60
|
+
primary_key_columns << column[0]
|
61
|
+
end
|
62
|
+
schema << ",\n" unless column == db.schema(table).last && primary_key_column_count == 1
|
63
|
+
end
|
64
|
+
if primary_key_column_count > 1
|
65
|
+
schema << "PRIMARY KEY (#{primary_key_columns.join(",")})"
|
58
66
|
end
|
59
|
-
schema << ");\n"
|
60
67
|
db.foreign_key_list(table).each do |fk|
|
61
|
-
schema << "
|
68
|
+
schema << ",\n" if fk == db.foreign_key_list(table).first
|
69
|
+
schema << "FOREIGN KEY (#{fk[:columns][0]}) REFERENCES #{fk[:table]}(#{fk[:key][0]})"
|
70
|
+
schema << ",\n" unless fk == db.foreign_key_list(table).last
|
62
71
|
end
|
72
|
+
schema << ");\n"
|
63
73
|
end
|
64
74
|
schema
|
65
75
|
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Utils
|
5
|
+
module TokenLength
|
6
|
+
#
|
7
|
+
# This class is meant to validate the length of the text passed in to AI21's API.
|
8
|
+
# It is used to validate the token length before the API call is made
|
9
|
+
#
|
10
|
+
|
11
|
+
class AI21Validator < BaseValidator
|
12
|
+
TOKEN_LIMITS = {
|
13
|
+
"j2-ultra" => 8192,
|
14
|
+
"j2-mid" => 8192,
|
15
|
+
"j2-light" => 8192
|
16
|
+
}.freeze
|
17
|
+
|
18
|
+
#
|
19
|
+
# Calculate token length for a given text and model name
|
20
|
+
#
|
21
|
+
# @param text [String] The text to calculate the token length for
|
22
|
+
# @param model_name [String] The model name to validate against
|
23
|
+
# @return [Integer] The token length of the text
|
24
|
+
#
|
25
|
+
def self.token_length(text, model_name, client)
|
26
|
+
res = client.tokenize(text)
|
27
|
+
res.dig(:tokens).length
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.token_limit(model_name)
|
31
|
+
TOKEN_LIMITS[model_name]
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Utils
|
5
|
+
module TokenLength
|
6
|
+
#
|
7
|
+
# This class is meant to validate the length of the text passed in to Cohere's API.
|
8
|
+
# It is used to validate the token length before the API call is made
|
9
|
+
#
|
10
|
+
|
11
|
+
class CohereValidator < BaseValidator
|
12
|
+
TOKEN_LIMITS = {
|
13
|
+
# Source:
|
14
|
+
# https://docs.cohere.com/docs/models
|
15
|
+
"command-light" => 4096,
|
16
|
+
"command" => 4096,
|
17
|
+
"base-light" => 2048,
|
18
|
+
"base" => 2048,
|
19
|
+
"embed-english-light-v2.0" => 512,
|
20
|
+
"embed-english-v2.0" => 512,
|
21
|
+
"embed-multilingual-v2.0" => 256,
|
22
|
+
"summarize-medium" => 2048,
|
23
|
+
"summarize-xlarge" => 2048
|
24
|
+
}.freeze
|
25
|
+
|
26
|
+
#
|
27
|
+
# Calculate token length for a given text and model name
|
28
|
+
#
|
29
|
+
# @param text [String] The text to calculate the token length for
|
30
|
+
# @param model_name [String] The model name to validate against
|
31
|
+
# @return [Integer] The token length of the text
|
32
|
+
#
|
33
|
+
def self.token_length(text, model_name, client)
|
34
|
+
res = client.tokenize(text: text)
|
35
|
+
res["tokens"].length
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.token_limit(model_name)
|
39
|
+
TOKEN_LIMITS[model_name]
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -14,7 +14,7 @@ module Langchain::Vectorsearch
|
|
14
14
|
# Initialize the Weaviate adapter
|
15
15
|
# @param url [String] The URL of the Weaviate instance
|
16
16
|
# @param api_key [String] The API key to use
|
17
|
-
# @param index_name [String] The name of the index to use
|
17
|
+
# @param index_name [String] The capitalized name of the index to use
|
18
18
|
# @param llm [Object] The LLM client to use
|
19
19
|
def initialize(url:, api_key:, index_name:, llm:)
|
20
20
|
depends_on "weaviate-ruby"
|
@@ -24,6 +24,9 @@ module Langchain::Vectorsearch
|
|
24
24
|
url: url,
|
25
25
|
api_key: api_key
|
26
26
|
)
|
27
|
+
|
28
|
+
# Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
|
29
|
+
# TODO: Capitalize index_name
|
27
30
|
@index_name = index_name
|
28
31
|
|
29
32
|
super(llm: llm)
|
@@ -32,31 +35,51 @@ module Langchain::Vectorsearch
|
|
32
35
|
# Add a list of texts to the index
|
33
36
|
# @param texts [Array] The list of texts to add
|
34
37
|
# @return [Hash] The response from the server
|
35
|
-
def add_texts(texts:)
|
36
|
-
objects = Array(texts).map do |text|
|
37
|
-
{
|
38
|
-
class: index_name,
|
39
|
-
properties: {content: text},
|
40
|
-
vector: llm.embed(text: text)
|
41
|
-
}
|
42
|
-
end
|
43
|
-
|
38
|
+
def add_texts(texts:, ids:)
|
44
39
|
client.objects.batch_create(
|
45
|
-
objects:
|
40
|
+
objects: weaviate_objects(texts, ids)
|
46
41
|
)
|
47
42
|
end
|
48
43
|
|
44
|
+
# Update a list of texts in the index
|
45
|
+
# @param texts [Array] The list of texts to update
|
46
|
+
# @return [Hash] The response from the server
|
47
|
+
def update_texts(texts:, ids:)
|
48
|
+
uuids = []
|
49
|
+
|
50
|
+
# Retrieve the UUIDs of the objects to update
|
51
|
+
Array(texts).map.with_index do |text, i|
|
52
|
+
record = client.query.get(
|
53
|
+
class_name: index_name,
|
54
|
+
fields: "_additional { id }",
|
55
|
+
where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
|
56
|
+
)
|
57
|
+
uuids.push record[0].dig("_additional", "id")
|
58
|
+
end
|
59
|
+
|
60
|
+
# Update the objects
|
61
|
+
texts.map.with_index do |text, i|
|
62
|
+
client.objects.update(
|
63
|
+
class_name: index_name,
|
64
|
+
id: uuids[i],
|
65
|
+
properties: {
|
66
|
+
__id: ids[i].to_s,
|
67
|
+
content: text
|
68
|
+
},
|
69
|
+
vector: llm.embed(text: text)
|
70
|
+
)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
49
74
|
# Create default schema
|
50
75
|
def create_default_schema
|
51
76
|
client.schema.create(
|
52
77
|
class_name: index_name,
|
53
78
|
vectorizer: "none",
|
54
79
|
properties: [
|
55
|
-
#
|
56
|
-
{
|
57
|
-
|
58
|
-
name: "content"
|
59
|
-
}
|
80
|
+
# __id to be used a pointer to the original document
|
81
|
+
{dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
|
82
|
+
{dataType: ["text"], name: "content"}
|
60
83
|
]
|
61
84
|
)
|
62
85
|
end
|
@@ -82,7 +105,7 @@ module Langchain::Vectorsearch
|
|
82
105
|
class_name: index_name,
|
83
106
|
near_vector: near_vector,
|
84
107
|
limit: k.to_s,
|
85
|
-
fields: "content _additional { id }"
|
108
|
+
fields: "__id content _additional { id }"
|
86
109
|
)
|
87
110
|
end
|
88
111
|
|
@@ -101,5 +124,24 @@ module Langchain::Vectorsearch
|
|
101
124
|
|
102
125
|
llm.chat(prompt: prompt)
|
103
126
|
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
def weaviate_objects(texts, ids)
|
131
|
+
Array(texts).map.with_index do |text, i|
|
132
|
+
weaviate_object(text, ids[i])
|
133
|
+
end
|
134
|
+
end
|
135
|
+
|
136
|
+
def weaviate_object(text, id)
|
137
|
+
{
|
138
|
+
class: index_name,
|
139
|
+
properties: {
|
140
|
+
__id: id.to_s,
|
141
|
+
content: text
|
142
|
+
},
|
143
|
+
vector: llm.embed(text: text)
|
144
|
+
}
|
145
|
+
end
|
104
146
|
end
|
105
147
|
end
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
@@ -74,7 +74,7 @@ module Langchain
|
|
74
74
|
|
75
75
|
module Agent
|
76
76
|
autoload :Base, "langchain/agent/base"
|
77
|
-
autoload :
|
77
|
+
autoload :ReActAgent, "langchain/agent/react_agent/react_agent.rb"
|
78
78
|
autoload :SQLQueryAgent, "langchain/agent/sql_query_agent/sql_query_agent.rb"
|
79
79
|
end
|
80
80
|
|
@@ -108,9 +108,11 @@ module Langchain
|
|
108
108
|
module Utils
|
109
109
|
module TokenLength
|
110
110
|
autoload :BaseValidator, "langchain/utils/token_length/base_validator"
|
111
|
-
autoload :
|
112
|
-
autoload :
|
111
|
+
autoload :AI21Validator, "langchain/utils/token_length/ai21_validator"
|
112
|
+
autoload :CohereValidator, "langchain/utils/token_length/cohere_validator"
|
113
113
|
autoload :GooglePalmValidator, "langchain/utils/token_length/google_palm_validator"
|
114
|
+
autoload :OpenAIValidator, "langchain/utils/token_length/openai_validator"
|
115
|
+
autoload :TokenLimitExceeded, "langchain/utils/token_length/token_limit_exceeded"
|
114
116
|
end
|
115
117
|
end
|
116
118
|
|
@@ -143,7 +145,18 @@ module Langchain
|
|
143
145
|
autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
|
144
146
|
end
|
145
147
|
|
148
|
+
module ActiveRecord
|
149
|
+
autoload :Hooks, "langchain/active_record/hooks"
|
150
|
+
end
|
151
|
+
|
152
|
+
module OutputParsers
|
153
|
+
autoload :Base, "langchain/output_parsers/base"
|
154
|
+
autoload :StructuredOutputParser, "langchain/output_parsers/structured"
|
155
|
+
end
|
156
|
+
|
146
157
|
module Errors
|
147
158
|
class BaseError < StandardError; end
|
148
159
|
end
|
149
160
|
end
|
161
|
+
|
162
|
+
require "langchain/railtie" if defined?(Rails)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-06-
|
11
|
+
date: 2023-06-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: 0.0.5
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: json-schema
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 4.0.0
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - "~>"
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: 4.0.0
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: dotenv-rails
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,14 +128,14 @@ dependencies:
|
|
114
128
|
requirements:
|
115
129
|
- - "~>"
|
116
130
|
- !ruby/object:Gem::Version
|
117
|
-
version: 0.2.
|
131
|
+
version: 0.2.1
|
118
132
|
type: :development
|
119
133
|
prerelease: false
|
120
134
|
version_requirements: !ruby/object:Gem::Requirement
|
121
135
|
requirements:
|
122
136
|
- - "~>"
|
123
137
|
- !ruby/object:Gem::Version
|
124
|
-
version: 0.2.
|
138
|
+
version: 0.2.1
|
125
139
|
- !ruby/object:Gem::Dependency
|
126
140
|
name: chroma-db
|
127
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,14 +156,14 @@ dependencies:
|
|
142
156
|
requirements:
|
143
157
|
- - "~>"
|
144
158
|
- !ruby/object:Gem::Version
|
145
|
-
version: 0.9.
|
159
|
+
version: 0.9.5
|
146
160
|
type: :development
|
147
161
|
prerelease: false
|
148
162
|
version_requirements: !ruby/object:Gem::Requirement
|
149
163
|
requirements:
|
150
164
|
- - "~>"
|
151
165
|
- !ruby/object:Gem::Version
|
152
|
-
version: 0.9.
|
166
|
+
version: 0.9.5
|
153
167
|
- !ruby/object:Gem::Dependency
|
154
168
|
name: docx
|
155
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -184,14 +198,14 @@ dependencies:
|
|
184
198
|
requirements:
|
185
199
|
- - "~>"
|
186
200
|
- !ruby/object:Gem::Version
|
187
|
-
version: 0.1.
|
201
|
+
version: 0.1.2
|
188
202
|
type: :development
|
189
203
|
prerelease: false
|
190
204
|
version_requirements: !ruby/object:Gem::Requirement
|
191
205
|
requirements:
|
192
206
|
- - "~>"
|
193
207
|
- !ruby/object:Gem::Version
|
194
|
-
version: 0.1.
|
208
|
+
version: 0.1.2
|
195
209
|
- !ruby/object:Gem::Dependency
|
196
210
|
name: google_search_results
|
197
211
|
requirement: !ruby/object:Gem::Requirement
|
@@ -462,15 +476,17 @@ files:
|
|
462
476
|
- Rakefile
|
463
477
|
- examples/create_and_manage_few_shot_prompt_templates.rb
|
464
478
|
- examples/create_and_manage_prompt_templates.rb
|
479
|
+
- examples/create_and_manage_prompt_templates_using_structured_output_parser.rb
|
465
480
|
- examples/pdf_store_and_query_with_chroma.rb
|
466
481
|
- examples/store_and_query_with_pinecone.rb
|
467
482
|
- examples/store_and_query_with_qdrant.rb
|
468
483
|
- examples/store_and_query_with_weaviate.rb
|
469
484
|
- lefthook.yml
|
470
485
|
- lib/langchain.rb
|
486
|
+
- lib/langchain/active_record/hooks.rb
|
471
487
|
- lib/langchain/agent/base.rb
|
472
|
-
- lib/langchain/agent/
|
473
|
-
- lib/langchain/agent/
|
488
|
+
- lib/langchain/agent/react_agent/react_agent.rb
|
489
|
+
- lib/langchain/agent/react_agent/react_agent_prompt.yaml
|
474
490
|
- lib/langchain/agent/sql_query_agent/sql_query_agent.rb
|
475
491
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
|
476
492
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
|
@@ -489,6 +505,8 @@ files:
|
|
489
505
|
- lib/langchain/llm/prompts/summarize_template.yaml
|
490
506
|
- lib/langchain/llm/replicate.rb
|
491
507
|
- lib/langchain/loader.rb
|
508
|
+
- lib/langchain/output_parsers/base.rb
|
509
|
+
- lib/langchain/output_parsers/structured.rb
|
492
510
|
- lib/langchain/processors/base.rb
|
493
511
|
- lib/langchain/processors/csv.rb
|
494
512
|
- lib/langchain/processors/docx.rb
|
@@ -502,6 +520,7 @@ files:
|
|
502
520
|
- lib/langchain/prompt/few_shot_prompt_template.rb
|
503
521
|
- lib/langchain/prompt/loading.rb
|
504
522
|
- lib/langchain/prompt/prompt_template.rb
|
523
|
+
- lib/langchain/railtie.rb
|
505
524
|
- lib/langchain/tool/base.rb
|
506
525
|
- lib/langchain/tool/calculator.rb
|
507
526
|
- lib/langchain/tool/database.rb
|
@@ -509,7 +528,9 @@ files:
|
|
509
528
|
- lib/langchain/tool/ruby_code_interpreter.rb
|
510
529
|
- lib/langchain/tool/weather.rb
|
511
530
|
- lib/langchain/tool/wikipedia.rb
|
531
|
+
- lib/langchain/utils/token_length/ai21_validator.rb
|
512
532
|
- lib/langchain/utils/token_length/base_validator.rb
|
533
|
+
- lib/langchain/utils/token_length/cohere_validator.rb
|
513
534
|
- lib/langchain/utils/token_length/google_palm_validator.rb
|
514
535
|
- lib/langchain/utils/token_length/openai_validator.rb
|
515
536
|
- lib/langchain/utils/token_length/token_limit_exceeded.rb
|