langchainrb 0.5.4 → 0.5.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.env.example +1 -0
- data/CHANGELOG.md +13 -0
- data/Gemfile.lock +10 -1
- data/README.md +8 -5
- data/examples/store_and_query_with_pinecone.rb +5 -4
- data/lib/langchain/agent/base.rb +5 -0
- data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb +22 -10
- data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.yaml +26 -0
- data/lib/langchain/agent/sql_query_agent/sql_query_agent.rb +7 -7
- data/lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml +11 -0
- data/lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml +21 -0
- data/lib/langchain/chunker/base.rb +15 -0
- data/lib/langchain/chunker/text.rb +38 -0
- data/lib/langchain/contextual_logger.rb +60 -0
- data/lib/langchain/conversation.rb +97 -0
- data/lib/langchain/data.rb +4 -0
- data/lib/langchain/llm/google_palm.rb +3 -2
- data/lib/langchain/llm/openai.rb +16 -6
- data/lib/langchain/llm/prompts/summarize_template.yaml +9 -0
- data/lib/langchain/llm/replicate.rb +1 -1
- data/lib/langchain/prompt/base.rb +2 -2
- data/lib/langchain/tool/base.rb +9 -3
- data/lib/langchain/tool/calculator.rb +2 -2
- data/lib/langchain/tool/database.rb +3 -3
- data/lib/langchain/tool/{serp_api.rb → google_search.rb} +9 -9
- data/lib/langchain/tool/ruby_code_interpreter.rb +1 -1
- data/lib/langchain/tool/weather.rb +67 -0
- data/lib/langchain/tool/wikipedia.rb +1 -1
- data/lib/langchain/utils/token_length/base_validator.rb +38 -0
- data/lib/langchain/utils/token_length/google_palm_validator.rb +9 -29
- data/lib/langchain/utils/token_length/openai_validator.rb +10 -27
- data/lib/langchain/utils/token_length/token_limit_exceeded.rb +17 -0
- data/lib/langchain/vectorsearch/base.rb +6 -0
- data/lib/langchain/vectorsearch/hnswlib.rb +2 -2
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +29 -12
- metadata +46 -11
- data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json +0 -10
- data/lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json +0 -10
- data/lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json +0 -10
- data/lib/langchain/chat.rb +0 -50
- data/lib/langchain/llm/prompts/summarize_template.json +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 045a900e70f73ac0c969ea0e7cb0130d12219ad869583a55d5f5857ceccac618
|
4
|
+
data.tar.gz: f6202d34280eeda69026add6cb0bcadb1625da58ed729e1b4ca02c2cfdbd76b4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b5cd3983b8a7389baace3befd24751d1c2974b94da29868fac6bfcd048681d2b6cc603d13f791d7ca4bffbc18b9278704c3db188112b51f1c71ac528c6c04f70
|
7
|
+
data.tar.gz: c061c1a877bc94488177ef79a46ed558540ba664a001a463a95fbe7f1f5f50c8895f359ab06fce26bb7dedf8cd246713b96e553fdfa55ca5b68c78f124e87a2a
|
data/.env.example
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,18 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.5.6] - 2023-06-18
|
4
|
+
- If used with OpenAI, Langchain::Conversation responses can now be streamed.
|
5
|
+
- Improved logging
|
6
|
+
- Langchain::Tool::SerpApi has been renamed to Langchain::Tool::GoogleSearch
|
7
|
+
- JSON prompt templates have been converted to YAML
|
8
|
+
- Langchain::Chunker::Text is introduced to provide simple text chunking functionality
|
9
|
+
- Misc fixes and improvements
|
10
|
+
|
11
|
+
## [0.5.5] - 2023-06-12
|
12
|
+
- [BREAKING] Rename `Langchain::Chat` to `Langchain::Conversation`
|
13
|
+
- 🛠️ Tools
|
14
|
+
- Introducing `Langchain::Tool::Weather`, a tool that calls Open Weather API to retrieve the current weather
|
15
|
+
|
3
16
|
## [0.5.4] - 2023-06-10
|
4
17
|
- 🔍 Vectorsearch
|
5
18
|
- Introducing support for HNSWlib
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,8 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb (0.5.
|
4
|
+
langchainrb (0.5.6)
|
5
|
+
baran (~> 0.1.6)
|
5
6
|
colorize (~> 0.8.1)
|
6
7
|
tiktoken_ruby (~> 0.0.5)
|
7
8
|
|
@@ -32,6 +33,7 @@ GEM
|
|
32
33
|
afm (0.2.2)
|
33
34
|
ai21 (0.2.0)
|
34
35
|
ast (2.4.2)
|
36
|
+
baran (0.1.6)
|
35
37
|
builder (3.2.4)
|
36
38
|
byebug (11.1.3)
|
37
39
|
childprocess (4.1.0)
|
@@ -135,6 +137,7 @@ GEM
|
|
135
137
|
activesupport (>= 3.0)
|
136
138
|
graphql
|
137
139
|
hashery (2.1.2)
|
140
|
+
hashie (5.0.0)
|
138
141
|
hnswlib (0.8.1)
|
139
142
|
httparty (0.21.0)
|
140
143
|
mini_mime (>= 1.0.0)
|
@@ -167,6 +170,11 @@ GEM
|
|
167
170
|
racc (~> 1.4)
|
168
171
|
nokogiri (1.14.3-x86_64-linux)
|
169
172
|
racc (~> 1.4)
|
173
|
+
open-weather-ruby-client (0.3.0)
|
174
|
+
activesupport
|
175
|
+
faraday (>= 1.0.0)
|
176
|
+
faraday_middleware
|
177
|
+
hashie
|
170
178
|
parallel (1.23.0)
|
171
179
|
parser (3.2.2.1)
|
172
180
|
ast (~> 2.4.1)
|
@@ -318,6 +326,7 @@ DEPENDENCIES
|
|
318
326
|
langchainrb!
|
319
327
|
milvus (~> 0.9.0)
|
320
328
|
nokogiri (~> 1.13)
|
329
|
+
open-weather-ruby-client (~> 0.3.0)
|
321
330
|
pdf-reader (~> 1.4)
|
322
331
|
pg (~> 1.5)
|
323
332
|
pgvector (~> 0.2)
|
data/README.md
CHANGED
@@ -10,6 +10,8 @@
|
|
10
10
|
[![Gem Version](https://badge.fury.io/rb/langchainrb.svg)](https://badge.fury.io/rb/langchainrb)
|
11
11
|
[![Docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://rubydoc.info/gems/langchainrb)
|
12
12
|
[![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/andreibondarev/langchainrb/blob/main/LICENSE.txt)
|
13
|
+
[![](https://dcbadge.vercel.app/api/server/WWqjwxMv?compact=true&style=flat)](https://discord.gg/WWqjwxMv)
|
14
|
+
|
13
15
|
|
14
16
|
Langchain.rb is a library that's an abstraction layer on top many emergent AI, ML and other DS tools. The goal is to abstract complexity and difficult concepts to make building AI/ML-supercharged applications approachable for traditional software engineers.
|
15
17
|
|
@@ -264,7 +266,7 @@ Agents are semi-autonomous bots that can respond to user questions and use avail
|
|
264
266
|
Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
|
265
267
|
|
266
268
|
```ruby
|
267
|
-
search_tool = Langchain::Tool::
|
269
|
+
search_tool = Langchain::Tool::GoogleSearch.new(api_key: ENV["SERPAPI_API_KEY"])
|
268
270
|
calculator = Langchain::Tool::Calculator.new
|
269
271
|
|
270
272
|
openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
@@ -275,7 +277,7 @@ agent = Langchain::Agent::ChainOfThoughtAgent.new(
|
|
275
277
|
)
|
276
278
|
|
277
279
|
agent.tools
|
278
|
-
# => ["
|
280
|
+
# => ["google_search", "calculator"]
|
279
281
|
```
|
280
282
|
```ruby
|
281
283
|
agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
|
@@ -308,7 +310,8 @@ agent.run(question: "How many users have a name with length greater than 5 in th
|
|
308
310
|
| "calculator" | Useful for getting the result of a math expression | | `gem "eqn", "~> 1.6.5"` |
|
309
311
|
| "database" | Useful for querying a SQL database | | `gem "sequel", "~> 5.68.0"` |
|
310
312
|
| "ruby_code_interpreter" | Interprets Ruby expressions | | `gem "safe_ruby", "~> 1.0.4"` |
|
311
|
-
| "
|
313
|
+
| "google_search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
|
314
|
+
| "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
|
312
315
|
| "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
|
313
316
|
|
314
317
|
#### Loaders 🚚
|
@@ -363,8 +366,8 @@ Langchain.logger.level = :info
|
|
363
366
|
4. `bin/console` to load the gem in a REPL session. Feel free to add your own instances of LLMs, Tools, Agents, etc. and experiment with them.
|
364
367
|
5. Optionally, install lefthook git hooks for pre-commit to auto lint: `gem install lefthook && lefthook install -f`
|
365
368
|
|
366
|
-
##
|
367
|
-
Join us in the [
|
369
|
+
## Discord
|
370
|
+
Join us in the [Langchain.rb](https://discord.gg/hXutDWGDd) Discord server.
|
368
371
|
|
369
372
|
## Core Contributors
|
370
373
|
[<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://github.com/andreibondarev)
|
@@ -3,7 +3,7 @@ require "langchain"
|
|
3
3
|
# gem install pinecone
|
4
4
|
# or add `gem "pinecone"` to your Gemfile
|
5
5
|
|
6
|
-
# Instantiate the
|
6
|
+
# Instantiate the Pinecone client
|
7
7
|
pinecone = Langchain::Vectorsearch::Pinecone.new(
|
8
8
|
environment: ENV["PINECONE_ENVIRONMENT"],
|
9
9
|
api_key: ENV["PINECONE_API_KEY"],
|
@@ -12,6 +12,7 @@ pinecone = Langchain::Vectorsearch::Pinecone.new(
|
|
12
12
|
)
|
13
13
|
|
14
14
|
# Create the default schema.
|
15
|
+
# If you are using the free Pinecone tier, ensure there is not an existing schema/index
|
15
16
|
pinecone.create_default_schema
|
16
17
|
|
17
18
|
# Set up an array of text strings
|
@@ -20,7 +21,7 @@ recipes = [
|
|
20
21
|
"Heat oven to 190C/fan 170C/gas 5. Heat 1 tbsp oil and the butter in a frying pan, then add the onion and fry for 5 mins until softened. Cool slightly. Tip the sausagemeat, lemon zest, breadcrumbs, apricots, chestnuts and thyme into a bowl. Add the onion and cranberries, and mix everything together with your hands, adding plenty of pepper and a little salt. Cut each chicken breast into three fillets lengthwise and season all over with salt and pepper. Heat the remaining oil in the frying pan, and fry the chicken fillets quickly until browned, about 6-8 mins. Roll out two-thirds of the pastry to line a 20-23cm springform or deep loose-based tart tin. Press in half the sausage mix and spread to level. Then add the chicken pieces in one layer and cover with the rest of the sausage. Press down lightly. Roll out the remaining pastry. Brush the edges of the pastry with beaten egg and cover with the pastry lid. Pinch the edges to seal, then trim. Brush the top of the pie with egg, then roll out the trimmings to make holly leaf shapes and berries. Decorate the pie and brush again with egg. Set the tin on a baking sheet and bake for 50-60 mins, then cool in the tin for 15 mins. Remove and leave to cool completely. Serve with a winter salad and pickles."
|
21
22
|
]
|
22
23
|
|
23
|
-
# Add data to the index.
|
24
|
+
# Add data to the index. Pinecone will use OpenAI to generate embeddings behind the scene.
|
24
25
|
pinecone.add_texts(
|
25
26
|
texts: recipes
|
26
27
|
)
|
@@ -33,10 +34,10 @@ pinecone.similarity_search(
|
|
33
34
|
|
34
35
|
# Interact with your index through Q&A
|
35
36
|
pinecone.ask(
|
36
|
-
question: "What is
|
37
|
+
question: "What is a good recipe for chicken?"
|
37
38
|
)
|
38
39
|
|
39
|
-
# Generate
|
40
|
+
# Generate an embedding and search by it
|
40
41
|
openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
41
42
|
embedding = openai.embed(text: "veggie")
|
42
43
|
|
data/lib/langchain/agent/base.rb
CHANGED
@@ -7,28 +7,30 @@ module Langchain::Agent
|
|
7
7
|
#
|
8
8
|
# agent = Langchain::Agent::ChainOfThoughtAgent.new(
|
9
9
|
# llm: llm,
|
10
|
-
# tools: ["
|
10
|
+
# tools: ["google_search", "calculator", "wikipedia"]
|
11
11
|
# )
|
12
12
|
#
|
13
13
|
# agent.tools
|
14
|
-
# # => ["
|
14
|
+
# # => ["google_search", "calculator", "wikipedia"]
|
15
15
|
#
|
16
16
|
# agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
|
17
17
|
# #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
|
18
18
|
class ChainOfThoughtAgent < Base
|
19
|
-
attr_reader :llm, :tools
|
19
|
+
attr_reader :llm, :tools, :max_iterations
|
20
20
|
|
21
21
|
# Initializes the Agent
|
22
22
|
#
|
23
23
|
# @param llm [Object] The LLM client to use
|
24
24
|
# @param tools [Array] The tools to use
|
25
|
+
# @param max_iterations [Integer] The maximum number of iterations to run
|
25
26
|
# @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
|
26
|
-
def initialize(llm:, tools: [])
|
27
|
+
def initialize(llm:, tools: [], max_iterations: 10)
|
27
28
|
Langchain::Tool::Base.validate_tools!(tools: tools)
|
28
29
|
|
29
30
|
@tools = tools
|
30
31
|
|
31
32
|
@llm = llm
|
33
|
+
@max_iterations = max_iterations
|
32
34
|
end
|
33
35
|
|
34
36
|
# Validate tools when they're re-assigned
|
@@ -51,8 +53,9 @@ module Langchain::Agent
|
|
51
53
|
tools: tools
|
52
54
|
)
|
53
55
|
|
54
|
-
|
55
|
-
|
56
|
+
final_response = nil
|
57
|
+
max_iterations.times do
|
58
|
+
Langchain.logger.info("Sending the prompt to the #{llm.class} LLM", for: self.class)
|
56
59
|
|
57
60
|
response = llm.complete(prompt: prompt, stop_sequences: ["Observation:"])
|
58
61
|
|
@@ -68,7 +71,7 @@ module Langchain::Agent
|
|
68
71
|
|
69
72
|
# Find the Tool and call `execute`` with action_input as the input
|
70
73
|
tool = tools.find { |tool| tool.tool_name == action.strip }
|
71
|
-
Langchain.logger.info("
|
74
|
+
Langchain.logger.info("Invoking \"#{tool.class}\" Tool with \"#{action_input}\"", for: self.class)
|
72
75
|
|
73
76
|
# Call `execute` with action_input as the input
|
74
77
|
result = tool.execute(input: action_input)
|
@@ -81,9 +84,12 @@ module Langchain::Agent
|
|
81
84
|
end
|
82
85
|
else
|
83
86
|
# Return the final answer
|
84
|
-
|
87
|
+
final_response = response.match(/Final Answer: (.*)/)&.send(:[], -1)
|
88
|
+
break
|
85
89
|
end
|
86
90
|
end
|
91
|
+
|
92
|
+
final_response || raise(MaxIterationsReachedError.new(max_iterations))
|
87
93
|
end
|
88
94
|
|
89
95
|
private
|
@@ -107,12 +113,18 @@ module Langchain::Agent
|
|
107
113
|
)
|
108
114
|
end
|
109
115
|
|
110
|
-
# Load the PromptTemplate from the
|
116
|
+
# Load the PromptTemplate from the YAML file
|
111
117
|
# @return [PromptTemplate] PromptTemplate instance
|
112
118
|
def prompt_template
|
113
119
|
@template ||= Langchain::Prompt.load_from_path(
|
114
|
-
file_path: Langchain.root.join("langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.
|
120
|
+
file_path: Langchain.root.join("langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.yaml")
|
115
121
|
)
|
116
122
|
end
|
123
|
+
|
124
|
+
class MaxIterationsReachedError < Langchain::Errors::BaseError
|
125
|
+
def initialize(max_iterations)
|
126
|
+
super("Agent stopped after #{max_iterations} iterations")
|
127
|
+
end
|
128
|
+
end
|
117
129
|
end
|
118
130
|
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
_type: prompt
|
2
|
+
template: |
|
3
|
+
Today is {date} and you can use tools to get new information. Answer the following questions as best you can using the following tools:
|
4
|
+
|
5
|
+
{tools}
|
6
|
+
|
7
|
+
Use the following format:
|
8
|
+
|
9
|
+
Question: the input question you must answer
|
10
|
+
Thought: you should always think about what to do
|
11
|
+
Action: the action to take, should be one of {tool_names}
|
12
|
+
Action Input: the input to the action
|
13
|
+
Observation: the result of the action
|
14
|
+
... (this Thought/Action/Action Input/Observation can repeat N times)
|
15
|
+
Thought: I now know the final answer
|
16
|
+
Final Answer: the final answer to the original input question
|
17
|
+
|
18
|
+
Begin!
|
19
|
+
|
20
|
+
Question: {question}
|
21
|
+
Thought:
|
22
|
+
input_variables:
|
23
|
+
- date
|
24
|
+
- question
|
25
|
+
- tools
|
26
|
+
- tool_names
|
@@ -26,15 +26,15 @@ module Langchain::Agent
|
|
26
26
|
prompt = create_prompt_for_sql(question: question)
|
27
27
|
|
28
28
|
# Get the SQL string to execute
|
29
|
-
Langchain.logger.info("
|
29
|
+
Langchain.logger.info("Passing the inital prompt to the #{llm.class} LLM", for: self.class)
|
30
30
|
sql_string = llm.complete(prompt: prompt)
|
31
31
|
|
32
32
|
# Execute the SQL string and collect the results
|
33
|
-
Langchain.logger.info("
|
33
|
+
Langchain.logger.info("Passing the SQL to the Database: #{sql_string}", for: self.class)
|
34
34
|
results = db.execute(input: sql_string)
|
35
35
|
|
36
36
|
# Pass the results and get the LLM to synthesize the answer to the question
|
37
|
-
Langchain.logger.info("
|
37
|
+
Langchain.logger.info("Passing the synthesize prompt to the #{llm.class} LLM with results: #{results}", for: self.class)
|
38
38
|
prompt2 = create_prompt_for_answer(question: question, sql_query: sql_string, results: results)
|
39
39
|
llm.complete(prompt: prompt2)
|
40
40
|
end
|
@@ -52,11 +52,11 @@ module Langchain::Agent
|
|
52
52
|
)
|
53
53
|
end
|
54
54
|
|
55
|
-
# Load the PromptTemplate from the
|
55
|
+
# Load the PromptTemplate from the YAML file
|
56
56
|
# @return [PromptTemplate] PromptTemplate instance
|
57
57
|
def prompt_template_sql
|
58
58
|
Langchain::Prompt.load_from_path(
|
59
|
-
file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.
|
59
|
+
file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml")
|
60
60
|
)
|
61
61
|
end
|
62
62
|
|
@@ -71,11 +71,11 @@ module Langchain::Agent
|
|
71
71
|
)
|
72
72
|
end
|
73
73
|
|
74
|
-
# Load the PromptTemplate from the
|
74
|
+
# Load the PromptTemplate from the YAML file
|
75
75
|
# @return [PromptTemplate] PromptTemplate instance
|
76
76
|
def prompt_template_answer
|
77
77
|
Langchain::Prompt.load_from_path(
|
78
|
-
file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.
|
78
|
+
file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml")
|
79
79
|
)
|
80
80
|
end
|
81
81
|
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
_type: prompt
|
2
|
+
template: |
|
3
|
+
Given an input question and results of a SQL query, look at the results and return the answer. Use the following format:
|
4
|
+
Question: {question}
|
5
|
+
The SQL query: {sql_query}
|
6
|
+
Result of the SQLQuery: {results}
|
7
|
+
Final answer: Final answer here
|
8
|
+
input_variables:
|
9
|
+
- question
|
10
|
+
- sql_query
|
11
|
+
- results
|
@@ -0,0 +1,21 @@
|
|
1
|
+
_type: prompt
|
2
|
+
template: |
|
3
|
+
Given an input question, create a syntactically correct {dialect} query to run, then return the query in valid SQL.
|
4
|
+
Never query for all the columns from a specific table, only ask for a the few relevant columns given the question.
|
5
|
+
Pay attention to use only the column names that you can see in the schema description.
|
6
|
+
Be careful to not query for columns that do not exist.
|
7
|
+
Pay attention to which column is in which table.
|
8
|
+
Also, qualify column names with the table name when needed.
|
9
|
+
|
10
|
+
Only use the tables listed below.
|
11
|
+
{schema}
|
12
|
+
|
13
|
+
Use the following format:
|
14
|
+
|
15
|
+
Question: {question}
|
16
|
+
|
17
|
+
SQLQuery:
|
18
|
+
input_variables:
|
19
|
+
- dialect
|
20
|
+
- schema
|
21
|
+
- question
|
@@ -0,0 +1,15 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
module Chunker
|
5
|
+
# = Chunkers
|
6
|
+
# Chunkers are used to split documents into smaller chunks before indexing into vector search databases.
|
7
|
+
# Otherwise large documents, when retrieved and passed to LLMs, may hit the context window limits.
|
8
|
+
#
|
9
|
+
# == Available chunkers
|
10
|
+
#
|
11
|
+
# - {Langchain::Chunker::Text}
|
12
|
+
class Base
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "baran"
|
4
|
+
|
5
|
+
module Langchain
|
6
|
+
module Chunker
|
7
|
+
#
|
8
|
+
# Simple text chunker
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# Langchain::Chunker::Text.new(text).chunks
|
12
|
+
#
|
13
|
+
class Text < Base
|
14
|
+
attr_reader :text, :chunk_size, :chunk_overlap, :separator
|
15
|
+
|
16
|
+
# @param [String] text
|
17
|
+
# @param [Integer] chunk_size
|
18
|
+
# @param [Integer] chunk_overlap
|
19
|
+
# @param [String] separator
|
20
|
+
def initialize(text, chunk_size: 1000, chunk_overlap: 200, separator: "\n\n")
|
21
|
+
@text = text
|
22
|
+
@chunk_size = chunk_size
|
23
|
+
@chunk_overlap = chunk_overlap
|
24
|
+
@separator = separator
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Array<String>]
|
28
|
+
def chunks
|
29
|
+
splitter = Baran::CharacterTextSplitter.new(
|
30
|
+
chunk_size: chunk_size,
|
31
|
+
chunk_overlap: chunk_overlap,
|
32
|
+
separator: separator
|
33
|
+
)
|
34
|
+
splitter.chunks(text)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
class ContextualLogger
|
5
|
+
MESSAGE_COLOR_OPTIONS = {
|
6
|
+
debug: {
|
7
|
+
color: :white
|
8
|
+
},
|
9
|
+
error: {
|
10
|
+
color: :red
|
11
|
+
},
|
12
|
+
fatal: {
|
13
|
+
color: :red,
|
14
|
+
background: :white,
|
15
|
+
mode: :bold
|
16
|
+
},
|
17
|
+
unknown: {
|
18
|
+
color: :white
|
19
|
+
},
|
20
|
+
info: {
|
21
|
+
color: :white
|
22
|
+
},
|
23
|
+
warn: {
|
24
|
+
color: :yellow,
|
25
|
+
mode: :bold
|
26
|
+
}
|
27
|
+
}
|
28
|
+
|
29
|
+
def initialize(logger)
|
30
|
+
@logger = logger
|
31
|
+
@levels = Logger::Severity.constants.map(&:downcase)
|
32
|
+
end
|
33
|
+
|
34
|
+
def respond_to_missing?(method, include_private = false)
|
35
|
+
@logger.respond_to?(method, include_private)
|
36
|
+
end
|
37
|
+
|
38
|
+
def method_missing(method, *args, **kwargs, &block)
|
39
|
+
return @logger.send(method, *args, **kwargs, &block) unless @levels.include?(method)
|
40
|
+
|
41
|
+
for_class = kwargs.delete(:for)
|
42
|
+
for_class_name = for_class&.name
|
43
|
+
|
44
|
+
log_line_parts = []
|
45
|
+
log_line_parts << "[LangChain.rb]".colorize(color: :yellow)
|
46
|
+
log_line_parts << if for_class.respond_to?(:logger_options)
|
47
|
+
"[#{for_class_name}]".colorize(for_class.logger_options) + ":"
|
48
|
+
elsif for_class_name
|
49
|
+
"[#{for_class_name}]:"
|
50
|
+
end
|
51
|
+
log_line_parts << args.first.colorize(MESSAGE_COLOR_OPTIONS[method])
|
52
|
+
log_line = log_line_parts.compact.join(" ")
|
53
|
+
|
54
|
+
@logger.send(
|
55
|
+
method,
|
56
|
+
log_line
|
57
|
+
)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Langchain
|
4
|
+
#
|
5
|
+
# A high-level API for running a conversation with an LLM.
|
6
|
+
# Currently supports: OpenAI and Google PaLM LLMs.
|
7
|
+
#
|
8
|
+
# Usage:
|
9
|
+
# llm = Langchain::LLM::OpenAI.new(api_key: "YOUR_API_KEY")
|
10
|
+
# chat = Langchain::Conversation.new(llm: llm)
|
11
|
+
# chat.set_context("You are a chatbot from the future")
|
12
|
+
# chat.message("Tell me about future technologies")
|
13
|
+
#
|
14
|
+
# To stream the chat response:
|
15
|
+
# chat = Langchain::Conversation.new(llm: llm) do |chunk|
|
16
|
+
# print(chunk)
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
class Conversation
|
20
|
+
attr_reader :context, :examples, :messages
|
21
|
+
|
22
|
+
# The least number of tokens we want to be under the limit by
|
23
|
+
TOKEN_LEEWAY = 20
|
24
|
+
|
25
|
+
# Intialize Conversation with a LLM
|
26
|
+
#
|
27
|
+
# @param llm [Object] The LLM to use for the conversation
|
28
|
+
# @param options [Hash] Options to pass to the LLM, like temperature, top_k, etc.
|
29
|
+
# @return [Langchain::Conversation] The Langchain::Conversation instance
|
30
|
+
def initialize(llm:, **options, &block)
|
31
|
+
@llm = llm
|
32
|
+
@context = nil
|
33
|
+
@examples = []
|
34
|
+
@messages = options.delete(:messages) || []
|
35
|
+
@options = options
|
36
|
+
@block = block
|
37
|
+
end
|
38
|
+
|
39
|
+
# Set the context of the conversation. Usually used to set the model's persona.
|
40
|
+
# @param message [String] The context of the conversation
|
41
|
+
def set_context(message)
|
42
|
+
@context = message
|
43
|
+
end
|
44
|
+
|
45
|
+
# Add examples to the conversation. Used to give the model a sense of the conversation.
|
46
|
+
# @param examples [Array<Hash>] The examples to add to the conversation
|
47
|
+
def add_examples(examples)
|
48
|
+
@examples.concat examples
|
49
|
+
end
|
50
|
+
|
51
|
+
# Message the model with a prompt and return the response.
|
52
|
+
# @param message [String] The prompt to message the model with
|
53
|
+
# @return [String] The response from the model
|
54
|
+
def message(message)
|
55
|
+
append_user_message(message)
|
56
|
+
response = llm_response(message)
|
57
|
+
append_ai_message(response)
|
58
|
+
response
|
59
|
+
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
def llm_response(prompt)
|
64
|
+
@llm.chat(messages: @messages, context: @context, examples: @examples, **@options, &@block)
|
65
|
+
rescue Langchain::Utils::TokenLength::TokenLimitExceeded => exception
|
66
|
+
raise exception if @messages.size == 1
|
67
|
+
|
68
|
+
reduce_messages(exception.token_overflow)
|
69
|
+
retry
|
70
|
+
end
|
71
|
+
|
72
|
+
def reduce_messages(token_overflow)
|
73
|
+
@messages = @messages.drop_while do |message|
|
74
|
+
proceed = token_overflow > -TOKEN_LEEWAY
|
75
|
+
token_overflow -= token_length(message.to_json, model_name, llm: @llm)
|
76
|
+
|
77
|
+
proceed
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def append_ai_message(message)
|
82
|
+
@messages << {role: "ai", content: message}
|
83
|
+
end
|
84
|
+
|
85
|
+
def append_user_message(message)
|
86
|
+
@messages << {role: "user", content: message}
|
87
|
+
end
|
88
|
+
|
89
|
+
def model_name
|
90
|
+
@options[:model] || @llm.class::DEFAULTS[:chat_completion_model_name]
|
91
|
+
end
|
92
|
+
|
93
|
+
def token_length(content, model_name, options)
|
94
|
+
@llm.class::LENGTH_VALIDATOR.token_length(content, model_name, options)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
data/lib/langchain/data.rb
CHANGED
@@ -24,6 +24,7 @@ module Langchain::LLM
|
|
24
24
|
temperature: 0.0,
|
25
25
|
dimension: 768 # This is what the `embedding-gecko-001` model generates
|
26
26
|
}.freeze
|
27
|
+
LENGTH_VALIDATOR = Langchain::Utils::TokenLength::GooglePalmValidator
|
27
28
|
|
28
29
|
def initialize(api_key:)
|
29
30
|
depends_on "google_palm_api"
|
@@ -90,7 +91,7 @@ module Langchain::LLM
|
|
90
91
|
examples: compose_examples(examples)
|
91
92
|
}
|
92
93
|
|
93
|
-
|
94
|
+
LENGTH_VALIDATOR.validate_max_tokens!(default_params[:messages], "chat-bison-001", llm: self)
|
94
95
|
|
95
96
|
if options[:stop_sequences]
|
96
97
|
default_params[:stop] = options.delete(:stop_sequences)
|
@@ -116,7 +117,7 @@ module Langchain::LLM
|
|
116
117
|
#
|
117
118
|
def summarize(text:)
|
118
119
|
prompt_template = Langchain::Prompt.load_from_path(
|
119
|
-
file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.
|
120
|
+
file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.yaml")
|
120
121
|
)
|
121
122
|
prompt = prompt_template.format(text: text)
|
122
123
|
|