langchainrb 0.5.4 → 0.5.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.env.example +1 -0
  3. data/CHANGELOG.md +13 -0
  4. data/Gemfile.lock +10 -1
  5. data/README.md +8 -5
  6. data/examples/store_and_query_with_pinecone.rb +5 -4
  7. data/lib/langchain/agent/base.rb +5 -0
  8. data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb +22 -10
  9. data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.yaml +26 -0
  10. data/lib/langchain/agent/sql_query_agent/sql_query_agent.rb +7 -7
  11. data/lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml +11 -0
  12. data/lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml +21 -0
  13. data/lib/langchain/chunker/base.rb +15 -0
  14. data/lib/langchain/chunker/text.rb +38 -0
  15. data/lib/langchain/contextual_logger.rb +60 -0
  16. data/lib/langchain/conversation.rb +97 -0
  17. data/lib/langchain/data.rb +4 -0
  18. data/lib/langchain/llm/google_palm.rb +3 -2
  19. data/lib/langchain/llm/openai.rb +16 -6
  20. data/lib/langchain/llm/prompts/summarize_template.yaml +9 -0
  21. data/lib/langchain/llm/replicate.rb +1 -1
  22. data/lib/langchain/prompt/base.rb +2 -2
  23. data/lib/langchain/tool/base.rb +9 -3
  24. data/lib/langchain/tool/calculator.rb +2 -2
  25. data/lib/langchain/tool/database.rb +3 -3
  26. data/lib/langchain/tool/{serp_api.rb → google_search.rb} +9 -9
  27. data/lib/langchain/tool/ruby_code_interpreter.rb +1 -1
  28. data/lib/langchain/tool/weather.rb +67 -0
  29. data/lib/langchain/tool/wikipedia.rb +1 -1
  30. data/lib/langchain/utils/token_length/base_validator.rb +38 -0
  31. data/lib/langchain/utils/token_length/google_palm_validator.rb +9 -29
  32. data/lib/langchain/utils/token_length/openai_validator.rb +10 -27
  33. data/lib/langchain/utils/token_length/token_limit_exceeded.rb +17 -0
  34. data/lib/langchain/vectorsearch/base.rb +6 -0
  35. data/lib/langchain/vectorsearch/hnswlib.rb +2 -2
  36. data/lib/langchain/version.rb +1 -1
  37. data/lib/langchain.rb +29 -12
  38. metadata +46 -11
  39. data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json +0 -10
  40. data/lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json +0 -10
  41. data/lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json +0 -10
  42. data/lib/langchain/chat.rb +0 -50
  43. data/lib/langchain/llm/prompts/summarize_template.json +0 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 87647b8a7e2dc49359f3f6d655eda501dcac26ebdd14247ad6c583be8dc1a71c
4
- data.tar.gz: fb7b4321caa4ff026439158f5ccfc2ae9e7b515a69c35cba87385f2cb367fa85
3
+ metadata.gz: 045a900e70f73ac0c969ea0e7cb0130d12219ad869583a55d5f5857ceccac618
4
+ data.tar.gz: f6202d34280eeda69026add6cb0bcadb1625da58ed729e1b4ca02c2cfdbd76b4
5
5
  SHA512:
6
- metadata.gz: 4f80677c43c00e6d50e0494aa79cb7648b9f3878ed8d2a5f4f2dc90e308a3639589f8457a4615821b70b44c5a43ae4f26fcf00d7548684740e4c05dbcc165bf8
7
- data.tar.gz: 4722233dbed83d21f2dadff19a9b79a30d8fd208d6e30bd057f018060c602b7f00f0526ee0364823597806388e2a8da48e883a5e6fa77b31490199685644b4d2
6
+ metadata.gz: b5cd3983b8a7389baace3befd24751d1c2974b94da29868fac6bfcd048681d2b6cc603d13f791d7ca4bffbc18b9278704c3db188112b51f1c71ac528c6c04f70
7
+ data.tar.gz: c061c1a877bc94488177ef79a46ed558540ba664a001a463a95fbe7f1f5f50c8895f359ab06fce26bb7dedf8cd246713b96e553fdfa55ca5b68c78f124e87a2a
data/.env.example CHANGED
@@ -5,6 +5,7 @@ HUGGING_FACE_API_KEY=
5
5
  MILVUS_URL=
6
6
  OPENAI_API_KEY=
7
7
  GOOGLE_PALM_API_KEY=
8
+ OPEN_WEATHER_API_KEY=
8
9
  PINECONE_API_KEY=
9
10
  PINECONE_ENVIRONMENT=
10
11
  POSTGRES_URL=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,18 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.5.6] - 2023-06-18
4
+ - If used with OpenAI, Langchain::Conversation responses can now be streamed.
5
+ - Improved logging
6
+ - Langchain::Tool::SerpApi has been renamed to Langchain::Tool::GoogleSearch
7
+ - JSON prompt templates have been converted to YAML
8
+ - Langchain::Chunker::Text is introduced to provide simple text chunking functionality
9
+ - Misc fixes and improvements
10
+
11
+ ## [0.5.5] - 2023-06-12
12
+ - [BREAKING] Rename `Langchain::Chat` to `Langchain::Conversation`
13
+ - 🛠️ Tools
14
+ - Introducing `Langchain::Tool::Weather`, a tool that calls Open Weather API to retrieve the current weather
15
+
3
16
  ## [0.5.4] - 2023-06-10
4
17
  - 🔍 Vectorsearch
5
18
  - Introducing support for HNSWlib
data/Gemfile.lock CHANGED
@@ -1,7 +1,8 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.5.4)
4
+ langchainrb (0.5.6)
5
+ baran (~> 0.1.6)
5
6
  colorize (~> 0.8.1)
6
7
  tiktoken_ruby (~> 0.0.5)
7
8
 
@@ -32,6 +33,7 @@ GEM
32
33
  afm (0.2.2)
33
34
  ai21 (0.2.0)
34
35
  ast (2.4.2)
36
+ baran (0.1.6)
35
37
  builder (3.2.4)
36
38
  byebug (11.1.3)
37
39
  childprocess (4.1.0)
@@ -135,6 +137,7 @@ GEM
135
137
  activesupport (>= 3.0)
136
138
  graphql
137
139
  hashery (2.1.2)
140
+ hashie (5.0.0)
138
141
  hnswlib (0.8.1)
139
142
  httparty (0.21.0)
140
143
  mini_mime (>= 1.0.0)
@@ -167,6 +170,11 @@ GEM
167
170
  racc (~> 1.4)
168
171
  nokogiri (1.14.3-x86_64-linux)
169
172
  racc (~> 1.4)
173
+ open-weather-ruby-client (0.3.0)
174
+ activesupport
175
+ faraday (>= 1.0.0)
176
+ faraday_middleware
177
+ hashie
170
178
  parallel (1.23.0)
171
179
  parser (3.2.2.1)
172
180
  ast (~> 2.4.1)
@@ -318,6 +326,7 @@ DEPENDENCIES
318
326
  langchainrb!
319
327
  milvus (~> 0.9.0)
320
328
  nokogiri (~> 1.13)
329
+ open-weather-ruby-client (~> 0.3.0)
321
330
  pdf-reader (~> 1.4)
322
331
  pg (~> 1.5)
323
332
  pgvector (~> 0.2)
data/README.md CHANGED
@@ -10,6 +10,8 @@
10
10
  [![Gem Version](https://badge.fury.io/rb/langchainrb.svg)](https://badge.fury.io/rb/langchainrb)
11
11
  [![Docs](http://img.shields.io/badge/yard-docs-blue.svg)](http://rubydoc.info/gems/langchainrb)
12
12
  [![License](https://img.shields.io/badge/license-MIT-green.svg)](https://github.com/andreibondarev/langchainrb/blob/main/LICENSE.txt)
13
+ [![](https://dcbadge.vercel.app/api/server/WWqjwxMv?compact=true&style=flat)](https://discord.gg/WWqjwxMv)
14
+
13
15
 
14
16
  Langchain.rb is a library that's an abstraction layer on top many emergent AI, ML and other DS tools. The goal is to abstract complexity and difficult concepts to make building AI/ML-supercharged applications approachable for traditional software engineers.
15
17
 
@@ -264,7 +266,7 @@ Agents are semi-autonomous bots that can respond to user questions and use avail
264
266
  Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
265
267
 
266
268
  ```ruby
267
- search_tool = Langchain::Tool::SerpApi.new(api_key: ENV["SERPAPI_API_KEY"])
269
+ search_tool = Langchain::Tool::GoogleSearch.new(api_key: ENV["SERPAPI_API_KEY"])
268
270
  calculator = Langchain::Tool::Calculator.new
269
271
 
270
272
  openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
@@ -275,7 +277,7 @@ agent = Langchain::Agent::ChainOfThoughtAgent.new(
275
277
  )
276
278
 
277
279
  agent.tools
278
- # => ["search", "calculator"]
280
+ # => ["google_search", "calculator"]
279
281
  ```
280
282
  ```ruby
281
283
  agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
@@ -308,7 +310,8 @@ agent.run(question: "How many users have a name with length greater than 5 in th
308
310
  | "calculator" | Useful for getting the result of a math expression | | `gem "eqn", "~> 1.6.5"` |
309
311
  | "database" | Useful for querying a SQL database | | `gem "sequel", "~> 5.68.0"` |
310
312
  | "ruby_code_interpreter" | Interprets Ruby expressions | | `gem "safe_ruby", "~> 1.0.4"` |
311
- | "search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
313
+ | "google_search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
314
+ | "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
312
315
  | "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
313
316
 
314
317
  #### Loaders 🚚
@@ -363,8 +366,8 @@ Langchain.logger.level = :info
363
366
  4. `bin/console` to load the gem in a REPL session. Feel free to add your own instances of LLMs, Tools, Agents, etc. and experiment with them.
364
367
  5. Optionally, install lefthook git hooks for pre-commit to auto lint: `gem install lefthook && lefthook install -f`
365
368
 
366
- ## Community
367
- Join us in the [Ruby AI Builders](https://discord.gg/SBmjAnKT) Discord community in #langchainrb
369
+ ## Discord
370
+ Join us in the [Langchain.rb](https://discord.gg/hXutDWGDd) Discord server.
368
371
 
369
372
  ## Core Contributors
370
373
  [<img style="border-radius:50%" alt="Andrei Bondarev" src="https://avatars.githubusercontent.com/u/541665?v=4" width="80" height="80" class="avatar">](https://github.com/andreibondarev)
@@ -3,7 +3,7 @@ require "langchain"
3
3
  # gem install pinecone
4
4
  # or add `gem "pinecone"` to your Gemfile
5
5
 
6
- # Instantiate the Qdrant client
6
+ # Instantiate the Pinecone client
7
7
  pinecone = Langchain::Vectorsearch::Pinecone.new(
8
8
  environment: ENV["PINECONE_ENVIRONMENT"],
9
9
  api_key: ENV["PINECONE_API_KEY"],
@@ -12,6 +12,7 @@ pinecone = Langchain::Vectorsearch::Pinecone.new(
12
12
  )
13
13
 
14
14
  # Create the default schema.
15
+ # If you are using the free Pinecone tier, ensure there is not an existing schema/index
15
16
  pinecone.create_default_schema
16
17
 
17
18
  # Set up an array of text strings
@@ -20,7 +21,7 @@ recipes = [
20
21
  "Heat oven to 190C/fan 170C/gas 5. Heat 1 tbsp oil and the butter in a frying pan, then add the onion and fry for 5 mins until softened. Cool slightly. Tip the sausagemeat, lemon zest, breadcrumbs, apricots, chestnuts and thyme into a bowl. Add the onion and cranberries, and mix everything together with your hands, adding plenty of pepper and a little salt. Cut each chicken breast into three fillets lengthwise and season all over with salt and pepper. Heat the remaining oil in the frying pan, and fry the chicken fillets quickly until browned, about 6-8 mins. Roll out two-thirds of the pastry to line a 20-23cm springform or deep loose-based tart tin. Press in half the sausage mix and spread to level. Then add the chicken pieces in one layer and cover with the rest of the sausage. Press down lightly. Roll out the remaining pastry. Brush the edges of the pastry with beaten egg and cover with the pastry lid. Pinch the edges to seal, then trim. Brush the top of the pie with egg, then roll out the trimmings to make holly leaf shapes and berries. Decorate the pie and brush again with egg. Set the tin on a baking sheet and bake for 50-60 mins, then cool in the tin for 15 mins. Remove and leave to cool completely. Serve with a winter salad and pickles."
21
22
  ]
22
23
 
23
- # Add data to the index. Weaviate will use OpenAI to generate embeddings behind the scene.
24
+ # Add data to the index. Pinecone will use OpenAI to generate embeddings behind the scene.
24
25
  pinecone.add_texts(
25
26
  texts: recipes
26
27
  )
@@ -33,10 +34,10 @@ pinecone.similarity_search(
33
34
 
34
35
  # Interact with your index through Q&A
35
36
  pinecone.ask(
36
- question: "What is the best recipe for chicken?"
37
+ question: "What is a good recipe for chicken?"
37
38
  )
38
39
 
39
- # Generate your an embedding and search by it
40
+ # Generate an embedding and search by it
40
41
  openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
41
42
  embedding = openai.embed(text: "veggie")
42
43
 
@@ -10,5 +10,10 @@ module Langchain::Agent
10
10
  #
11
11
  # @abstract
12
12
  class Base
13
+ def self.logger_options
14
+ {
15
+ color: :red
16
+ }
17
+ end
13
18
  end
14
19
  end
@@ -7,28 +7,30 @@ module Langchain::Agent
7
7
  #
8
8
  # agent = Langchain::Agent::ChainOfThoughtAgent.new(
9
9
  # llm: llm,
10
- # tools: ["search", "calculator", "wikipedia"]
10
+ # tools: ["google_search", "calculator", "wikipedia"]
11
11
  # )
12
12
  #
13
13
  # agent.tools
14
- # # => ["search", "calculator", "wikipedia"]
14
+ # # => ["google_search", "calculator", "wikipedia"]
15
15
  #
16
16
  # agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
17
17
  # #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
18
18
  class ChainOfThoughtAgent < Base
19
- attr_reader :llm, :tools
19
+ attr_reader :llm, :tools, :max_iterations
20
20
 
21
21
  # Initializes the Agent
22
22
  #
23
23
  # @param llm [Object] The LLM client to use
24
24
  # @param tools [Array] The tools to use
25
+ # @param max_iterations [Integer] The maximum number of iterations to run
25
26
  # @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
26
- def initialize(llm:, tools: [])
27
+ def initialize(llm:, tools: [], max_iterations: 10)
27
28
  Langchain::Tool::Base.validate_tools!(tools: tools)
28
29
 
29
30
  @tools = tools
30
31
 
31
32
  @llm = llm
33
+ @max_iterations = max_iterations
32
34
  end
33
35
 
34
36
  # Validate tools when they're re-assigned
@@ -51,8 +53,9 @@ module Langchain::Agent
51
53
  tools: tools
52
54
  )
53
55
 
54
- loop do
55
- Langchain.logger.info("[#{self.class.name}]".red + ": Sending the prompt to the #{llm.class} LLM")
56
+ final_response = nil
57
+ max_iterations.times do
58
+ Langchain.logger.info("Sending the prompt to the #{llm.class} LLM", for: self.class)
56
59
 
57
60
  response = llm.complete(prompt: prompt, stop_sequences: ["Observation:"])
58
61
 
@@ -68,7 +71,7 @@ module Langchain::Agent
68
71
 
69
72
  # Find the Tool and call `execute`` with action_input as the input
70
73
  tool = tools.find { |tool| tool.tool_name == action.strip }
71
- Langchain.logger.info("[#{self.class.name}]".red + ": Invoking \"#{tool.class}\" Tool with \"#{action_input}\"")
74
+ Langchain.logger.info("Invoking \"#{tool.class}\" Tool with \"#{action_input}\"", for: self.class)
72
75
 
73
76
  # Call `execute` with action_input as the input
74
77
  result = tool.execute(input: action_input)
@@ -81,9 +84,12 @@ module Langchain::Agent
81
84
  end
82
85
  else
83
86
  # Return the final answer
84
- break response.match(/Final Answer: (.*)/)&.send(:[], -1)
87
+ final_response = response.match(/Final Answer: (.*)/)&.send(:[], -1)
88
+ break
85
89
  end
86
90
  end
91
+
92
+ final_response || raise(MaxIterationsReachedError.new(max_iterations))
87
93
  end
88
94
 
89
95
  private
@@ -107,12 +113,18 @@ module Langchain::Agent
107
113
  )
108
114
  end
109
115
 
110
- # Load the PromptTemplate from the JSON file
116
+ # Load the PromptTemplate from the YAML file
111
117
  # @return [PromptTemplate] PromptTemplate instance
112
118
  def prompt_template
113
119
  @template ||= Langchain::Prompt.load_from_path(
114
- file_path: Langchain.root.join("langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json")
120
+ file_path: Langchain.root.join("langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.yaml")
115
121
  )
116
122
  end
123
+
124
+ class MaxIterationsReachedError < Langchain::Errors::BaseError
125
+ def initialize(max_iterations)
126
+ super("Agent stopped after #{max_iterations} iterations")
127
+ end
128
+ end
117
129
  end
118
130
  end
@@ -0,0 +1,26 @@
1
+ _type: prompt
2
+ template: |
3
+ Today is {date} and you can use tools to get new information. Answer the following questions as best you can using the following tools:
4
+
5
+ {tools}
6
+
7
+ Use the following format:
8
+
9
+ Question: the input question you must answer
10
+ Thought: you should always think about what to do
11
+ Action: the action to take, should be one of {tool_names}
12
+ Action Input: the input to the action
13
+ Observation: the result of the action
14
+ ... (this Thought/Action/Action Input/Observation can repeat N times)
15
+ Thought: I now know the final answer
16
+ Final Answer: the final answer to the original input question
17
+
18
+ Begin!
19
+
20
+ Question: {question}
21
+ Thought:
22
+ input_variables:
23
+ - date
24
+ - question
25
+ - tools
26
+ - tool_names
@@ -26,15 +26,15 @@ module Langchain::Agent
26
26
  prompt = create_prompt_for_sql(question: question)
27
27
 
28
28
  # Get the SQL string to execute
29
- Langchain.logger.info("[#{self.class.name}]".red + ": Passing the inital prompt to the #{llm.class} LLM")
29
+ Langchain.logger.info("Passing the inital prompt to the #{llm.class} LLM", for: self.class)
30
30
  sql_string = llm.complete(prompt: prompt)
31
31
 
32
32
  # Execute the SQL string and collect the results
33
- Langchain.logger.info("[#{self.class.name}]".red + ": Passing the SQL to the Database: #{sql_string}")
33
+ Langchain.logger.info("Passing the SQL to the Database: #{sql_string}", for: self.class)
34
34
  results = db.execute(input: sql_string)
35
35
 
36
36
  # Pass the results and get the LLM to synthesize the answer to the question
37
- Langchain.logger.info("[#{self.class.name}]".red + ": Passing the synthesize prompt to the #{llm.class} LLM with results: #{results}")
37
+ Langchain.logger.info("Passing the synthesize prompt to the #{llm.class} LLM with results: #{results}", for: self.class)
38
38
  prompt2 = create_prompt_for_answer(question: question, sql_query: sql_string, results: results)
39
39
  llm.complete(prompt: prompt2)
40
40
  end
@@ -52,11 +52,11 @@ module Langchain::Agent
52
52
  )
53
53
  end
54
54
 
55
- # Load the PromptTemplate from the JSON file
55
+ # Load the PromptTemplate from the YAML file
56
56
  # @return [PromptTemplate] PromptTemplate instance
57
57
  def prompt_template_sql
58
58
  Langchain::Prompt.load_from_path(
59
- file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json")
59
+ file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml")
60
60
  )
61
61
  end
62
62
 
@@ -71,11 +71,11 @@ module Langchain::Agent
71
71
  )
72
72
  end
73
73
 
74
- # Load the PromptTemplate from the JSON file
74
+ # Load the PromptTemplate from the YAML file
75
75
  # @return [PromptTemplate] PromptTemplate instance
76
76
  def prompt_template_answer
77
77
  Langchain::Prompt.load_from_path(
78
- file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json")
78
+ file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml")
79
79
  )
80
80
  end
81
81
  end
@@ -0,0 +1,11 @@
1
+ _type: prompt
2
+ template: |
3
+ Given an input question and results of a SQL query, look at the results and return the answer. Use the following format:
4
+ Question: {question}
5
+ The SQL query: {sql_query}
6
+ Result of the SQLQuery: {results}
7
+ Final answer: Final answer here
8
+ input_variables:
9
+ - question
10
+ - sql_query
11
+ - results
@@ -0,0 +1,21 @@
1
+ _type: prompt
2
+ template: |
3
+ Given an input question, create a syntactically correct {dialect} query to run, then return the query in valid SQL.
4
+ Never query for all the columns from a specific table, only ask for a the few relevant columns given the question.
5
+ Pay attention to use only the column names that you can see in the schema description.
6
+ Be careful to not query for columns that do not exist.
7
+ Pay attention to which column is in which table.
8
+ Also, qualify column names with the table name when needed.
9
+
10
+ Only use the tables listed below.
11
+ {schema}
12
+
13
+ Use the following format:
14
+
15
+ Question: {question}
16
+
17
+ SQLQuery:
18
+ input_variables:
19
+ - dialect
20
+ - schema
21
+ - question
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Chunker
5
+ # = Chunkers
6
+ # Chunkers are used to split documents into smaller chunks before indexing into vector search databases.
7
+ # Otherwise large documents, when retrieved and passed to LLMs, may hit the context window limits.
8
+ #
9
+ # == Available chunkers
10
+ #
11
+ # - {Langchain::Chunker::Text}
12
+ class Base
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "baran"
4
+
5
+ module Langchain
6
+ module Chunker
7
+ #
8
+ # Simple text chunker
9
+ #
10
+ # Usage:
11
+ # Langchain::Chunker::Text.new(text).chunks
12
+ #
13
+ class Text < Base
14
+ attr_reader :text, :chunk_size, :chunk_overlap, :separator
15
+
16
+ # @param [String] text
17
+ # @param [Integer] chunk_size
18
+ # @param [Integer] chunk_overlap
19
+ # @param [String] separator
20
+ def initialize(text, chunk_size: 1000, chunk_overlap: 200, separator: "\n\n")
21
+ @text = text
22
+ @chunk_size = chunk_size
23
+ @chunk_overlap = chunk_overlap
24
+ @separator = separator
25
+ end
26
+
27
+ # @return [Array<String>]
28
+ def chunks
29
+ splitter = Baran::CharacterTextSplitter.new(
30
+ chunk_size: chunk_size,
31
+ chunk_overlap: chunk_overlap,
32
+ separator: separator
33
+ )
34
+ splitter.chunks(text)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,60 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ class ContextualLogger
5
+ MESSAGE_COLOR_OPTIONS = {
6
+ debug: {
7
+ color: :white
8
+ },
9
+ error: {
10
+ color: :red
11
+ },
12
+ fatal: {
13
+ color: :red,
14
+ background: :white,
15
+ mode: :bold
16
+ },
17
+ unknown: {
18
+ color: :white
19
+ },
20
+ info: {
21
+ color: :white
22
+ },
23
+ warn: {
24
+ color: :yellow,
25
+ mode: :bold
26
+ }
27
+ }
28
+
29
+ def initialize(logger)
30
+ @logger = logger
31
+ @levels = Logger::Severity.constants.map(&:downcase)
32
+ end
33
+
34
+ def respond_to_missing?(method, include_private = false)
35
+ @logger.respond_to?(method, include_private)
36
+ end
37
+
38
+ def method_missing(method, *args, **kwargs, &block)
39
+ return @logger.send(method, *args, **kwargs, &block) unless @levels.include?(method)
40
+
41
+ for_class = kwargs.delete(:for)
42
+ for_class_name = for_class&.name
43
+
44
+ log_line_parts = []
45
+ log_line_parts << "[LangChain.rb]".colorize(color: :yellow)
46
+ log_line_parts << if for_class.respond_to?(:logger_options)
47
+ "[#{for_class_name}]".colorize(for_class.logger_options) + ":"
48
+ elsif for_class_name
49
+ "[#{for_class_name}]:"
50
+ end
51
+ log_line_parts << args.first.colorize(MESSAGE_COLOR_OPTIONS[method])
52
+ log_line = log_line_parts.compact.join(" ")
53
+
54
+ @logger.send(
55
+ method,
56
+ log_line
57
+ )
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,97 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ #
5
+ # A high-level API for running a conversation with an LLM.
6
+ # Currently supports: OpenAI and Google PaLM LLMs.
7
+ #
8
+ # Usage:
9
+ # llm = Langchain::LLM::OpenAI.new(api_key: "YOUR_API_KEY")
10
+ # chat = Langchain::Conversation.new(llm: llm)
11
+ # chat.set_context("You are a chatbot from the future")
12
+ # chat.message("Tell me about future technologies")
13
+ #
14
+ # To stream the chat response:
15
+ # chat = Langchain::Conversation.new(llm: llm) do |chunk|
16
+ # print(chunk)
17
+ # end
18
+ #
19
+ class Conversation
20
+ attr_reader :context, :examples, :messages
21
+
22
+ # The least number of tokens we want to be under the limit by
23
+ TOKEN_LEEWAY = 20
24
+
25
+ # Intialize Conversation with a LLM
26
+ #
27
+ # @param llm [Object] The LLM to use for the conversation
28
+ # @param options [Hash] Options to pass to the LLM, like temperature, top_k, etc.
29
+ # @return [Langchain::Conversation] The Langchain::Conversation instance
30
+ def initialize(llm:, **options, &block)
31
+ @llm = llm
32
+ @context = nil
33
+ @examples = []
34
+ @messages = options.delete(:messages) || []
35
+ @options = options
36
+ @block = block
37
+ end
38
+
39
+ # Set the context of the conversation. Usually used to set the model's persona.
40
+ # @param message [String] The context of the conversation
41
+ def set_context(message)
42
+ @context = message
43
+ end
44
+
45
+ # Add examples to the conversation. Used to give the model a sense of the conversation.
46
+ # @param examples [Array<Hash>] The examples to add to the conversation
47
+ def add_examples(examples)
48
+ @examples.concat examples
49
+ end
50
+
51
+ # Message the model with a prompt and return the response.
52
+ # @param message [String] The prompt to message the model with
53
+ # @return [String] The response from the model
54
+ def message(message)
55
+ append_user_message(message)
56
+ response = llm_response(message)
57
+ append_ai_message(response)
58
+ response
59
+ end
60
+
61
+ private
62
+
63
+ def llm_response(prompt)
64
+ @llm.chat(messages: @messages, context: @context, examples: @examples, **@options, &@block)
65
+ rescue Langchain::Utils::TokenLength::TokenLimitExceeded => exception
66
+ raise exception if @messages.size == 1
67
+
68
+ reduce_messages(exception.token_overflow)
69
+ retry
70
+ end
71
+
72
+ def reduce_messages(token_overflow)
73
+ @messages = @messages.drop_while do |message|
74
+ proceed = token_overflow > -TOKEN_LEEWAY
75
+ token_overflow -= token_length(message.to_json, model_name, llm: @llm)
76
+
77
+ proceed
78
+ end
79
+ end
80
+
81
+ def append_ai_message(message)
82
+ @messages << {role: "ai", content: message}
83
+ end
84
+
85
+ def append_user_message(message)
86
+ @messages << {role: "user", content: message}
87
+ end
88
+
89
+ def model_name
90
+ @options[:model] || @llm.class::DEFAULTS[:chat_completion_model_name]
91
+ end
92
+
93
+ def token_length(content, model_name, options)
94
+ @llm.class::LENGTH_VALIDATOR.token_length(content, model_name, options)
95
+ end
96
+ end
97
+ end
@@ -12,5 +12,9 @@ module Langchain
12
12
  def value
13
13
  @data
14
14
  end
15
+
16
+ def chunks(opts = {})
17
+ Langchain::Chunker::Text.new(@data, **opts).chunks
18
+ end
15
19
  end
16
20
  end
@@ -24,6 +24,7 @@ module Langchain::LLM
24
24
  temperature: 0.0,
25
25
  dimension: 768 # This is what the `embedding-gecko-001` model generates
26
26
  }.freeze
27
+ LENGTH_VALIDATOR = Langchain::Utils::TokenLength::GooglePalmValidator
27
28
 
28
29
  def initialize(api_key:)
29
30
  depends_on "google_palm_api"
@@ -90,7 +91,7 @@ module Langchain::LLM
90
91
  examples: compose_examples(examples)
91
92
  }
92
93
 
93
- Langchain::Utils::TokenLength::GooglePalmValidator.validate_max_tokens!(self, default_params[:messages], "chat-bison-001")
94
+ LENGTH_VALIDATOR.validate_max_tokens!(default_params[:messages], "chat-bison-001", llm: self)
94
95
 
95
96
  if options[:stop_sequences]
96
97
  default_params[:stop] = options.delete(:stop_sequences)
@@ -116,7 +117,7 @@ module Langchain::LLM
116
117
  #
117
118
  def summarize(text:)
118
119
  prompt_template = Langchain::Prompt.load_from_path(
119
- file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.json")
120
+ file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.yaml")
120
121
  )
121
122
  prompt = prompt_template.format(text: text)
122
123