langchainrb 0.5.7 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ba5e9e8257d18c0940fdaf4fe84c03d594d8f1151e40e1bb35de059f8e6e5094
4
- data.tar.gz: 11310635819502b9bfbd66bc45dc7aa1ce500d4a874dcc5ab550d6c5edf7194f
3
+ metadata.gz: 7fba7b5e03ae75aa4ee3c89dd0322a73bbb9d3ced79f48dda8861af1f4f197b8
4
+ data.tar.gz: 404e742b0911305beec3bd22575740fc78ed9005e21295e0f7c348c1bede3e7e
5
5
  SHA512:
6
- metadata.gz: 4b97e21bcbc0c5f1d842271b64949c07d6d78190cd97c22fd0dab735d6b6ae2f2e6328ba2631dfc77ed0a5dd227573e3f84f064e8dd9332701848a798747ac9a
7
- data.tar.gz: 267b2029de10acf45bb97a040d174102f666e048aaaf03ab76218cd5281574c1ae977ba8e975faf4b690e677611daba2fb0fc975801c0e41072f050ec2ac2e34
6
+ metadata.gz: c8166375c28abe9bc3a7e02a2ceba2ce1bea8ff4f751beb2c69d6f34aa46ba2b7c6ca34458f79b6dfba6d8908cc5e9b055f4e4e7dc6c4c09ac65f7f589c12eb7
7
+ data.tar.gz: fb81a51867575c5fae10b79f0f3ee761b25b75d623f2c071d000b39f84c672d9f7164e3c8442a1f11d7abdff8edd41f7e79c2c3a7cb99fa9952406bc60506ce8
data/CHANGELOG.md CHANGED
@@ -1,5 +1,10 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.0] - 2023-06-22
4
+ - [BREAKING] Rename `ChainOfThoughtAgent` to `ReActAgent`
5
+ - Implement A21 token validator
6
+ - Add `Langchain::OutputParsers`
7
+
3
8
  ## [0.5.7] - 2023-06-19
4
9
  - Developer can modify models used when initiliazing `Langchain::LLM::*` clients
5
10
  - Improvements to the `SQLQueryAgent` and the database tool
data/Gemfile.lock CHANGED
@@ -1,9 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.5.7)
4
+ langchainrb (0.6.0)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
+ json-schema (~> 4.0.0)
7
8
  tiktoken_ruby (~> 0.0.5)
8
9
 
9
10
  GEM
@@ -31,7 +32,7 @@ GEM
31
32
  addressable (2.8.4)
32
33
  public_suffix (>= 2.0.2, < 6.0)
33
34
  afm (0.2.2)
34
- ai21 (0.2.0)
35
+ ai21 (0.2.1)
35
36
  ast (2.4.2)
36
37
  baran (0.1.6)
37
38
  builder (3.2.4)
@@ -41,7 +42,7 @@ GEM
41
42
  dry-monads (~> 1.6)
42
43
  ruby-next-core (>= 0.15.0)
43
44
  coderay (1.1.3)
44
- cohere-ruby (0.9.4)
45
+ cohere-ruby (0.9.5)
45
46
  faraday (>= 1.0.0)
46
47
  faraday_middleware (>= 1.0.0)
47
48
  colorize (0.8.1)
@@ -124,7 +125,7 @@ GEM
124
125
  faraday-retry (1.0.3)
125
126
  faraday_middleware (1.2.0)
126
127
  faraday (~> 1.0)
127
- google_palm_api (0.1.1)
128
+ google_palm_api (0.1.2)
128
129
  faraday (>= 1.0.0)
129
130
  faraday_middleware (>= 1.0.0)
130
131
  google_search_results (2.0.1)
@@ -148,6 +149,8 @@ GEM
148
149
  concurrent-ruby (~> 1.0)
149
150
  ice_nine (0.11.2)
150
151
  json (2.6.3)
152
+ json-schema (4.0.0)
153
+ addressable (>= 2.8)
151
154
  language_server-protocol (3.17.0.3)
152
155
  lint_roller (1.0.0)
153
156
  loofah (2.21.1)
@@ -219,7 +222,7 @@ GEM
219
222
  zeitwerk (~> 2.5)
220
223
  rainbow (3.1.1)
221
224
  rake (13.0.6)
222
- rb_sys (0.9.78)
225
+ rb_sys (0.9.79)
223
226
  rdiscount (2.2.7)
224
227
  regexp_parser (2.8.0)
225
228
  replicate-ruby (0.2.2)
@@ -313,13 +316,13 @@ PLATFORMS
313
316
  x86_64-linux
314
317
 
315
318
  DEPENDENCIES
316
- ai21 (~> 0.2.0)
319
+ ai21 (~> 0.2.1)
317
320
  chroma-db (~> 0.3.0)
318
- cohere-ruby (~> 0.9.4)
321
+ cohere-ruby (~> 0.9.5)
319
322
  docx (~> 0.8.0)
320
323
  dotenv-rails (~> 2.7.6)
321
324
  eqn (~> 1.6.5)
322
- google_palm_api (~> 0.1.1)
325
+ google_palm_api (~> 0.1.2)
323
326
  google_search_results (~> 2.0.0)
324
327
  hnswlib (~> 0.8.1)
325
328
  hugging-face (~> 0.3.4)
data/README.md CHANGED
@@ -155,13 +155,13 @@ replicate = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
155
155
  ```
156
156
 
157
157
  #### Google PaLM (Pathways Language Model)
158
- Add `"google_palm_api", "~> 0.1.1"` to your Gemfile.
158
+ Add `"google_palm_api", "~> 0.1.2"` to your Gemfile.
159
159
  ```ruby
160
160
  google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
161
161
  ```
162
162
 
163
163
  #### AI21
164
- Add `gem "ai21", "~> 0.2.0"` to your Gemfile.
164
+ Add `gem "ai21", "~> 0.2.1"` to your Gemfile.
165
165
  ```ruby
166
166
  ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
167
167
  ```
@@ -261,7 +261,7 @@ prompt.input_variables #=> ["adjective", "content"]
261
261
  ### Using Agents 🤖
262
262
  Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
263
263
 
264
- #### Chain-of-Thought Agent
264
+ #### ReAct Agent
265
265
 
266
266
  Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
267
267
 
@@ -271,7 +271,7 @@ calculator = Langchain::Tool::Calculator.new
271
271
 
272
272
  openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
273
273
 
274
- agent = Langchain::Agent::ChainOfThoughtAgent.new(
274
+ agent = Langchain::Agent::ReActAgent.new(
275
275
  llm: openai,
276
276
  tools: [search_tool, calculator]
277
277
  )
@@ -0,0 +1,104 @@
1
+ require "langchain"
2
+
3
+ # Generate a prompt that directs the LLM to provide a JSON response that adheres to a specific JSON schema.
4
+ json_schema = {
5
+ type: "object",
6
+ properties: {
7
+ name: {
8
+ type: "string",
9
+ description: "Persons name"
10
+ },
11
+ age: {
12
+ type: "number",
13
+ description: "Persons age"
14
+ },
15
+ interests: {
16
+ type: "array",
17
+ items: {
18
+ type: "object",
19
+ properties: {
20
+ interest: {
21
+ type: "string",
22
+ description: "A topic of interest"
23
+ },
24
+ levelOfInterest: {
25
+ type: "number",
26
+ description: "A value between 0 and 100 of how interested the person is in this interest"
27
+ }
28
+ },
29
+ required: ["interest", "levelOfInterest"],
30
+ additionalProperties: false
31
+ },
32
+ minItems: 1,
33
+ maxItems: 3,
34
+ description: "A list of the person's interests"
35
+ }
36
+ },
37
+ required: ["name", "age", "interests"],
38
+ additionalProperties: false
39
+ }
40
+ parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema(json_schema)
41
+ prompt = Langchain::Prompt::PromptTemplate.new(template: "Generate details of a fictional character.\n{format_instructions}\nCharacter description: {description}", input_variables: ["description", "format_instructions"])
42
+ prompt.format(description: "Korean chemistry student", format_instructions: parser.get_format_instructions)
43
+ # Generate details of a fictional character.
44
+ # You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
45
+
46
+ # "JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
47
+
48
+ # For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}, "required": ["foo"]}
49
+ # would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
50
+ # Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
51
+
52
+ # Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
53
+
54
+ # Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
55
+ # ```json
56
+ # {"type":"object","properties":{"name":{"type":"string","description":"Persons name"},"age":{"type":"number","description":"Persons age"},"interests":{"type":"array","items":{"type":"object","properties":{"interest":{"type":"string","description":"A topic of interest"},"levelOfInterest":{"type":"number","description":"A value between 0 and 100 of how interested the person is in this interest"},"required":["interest","levelOfInterest"],"additionalProperties":false},"minItems":1,"maxItems":3,"description":"A list of the person's interests"},"required":["name","age","interests"],"additionalProperties":false}
57
+ # ```
58
+
59
+ # Character description: Korean chemistry student
60
+
61
+ # LLM example response:
62
+ llm_example_response = <<~RESPONSE
63
+ Here is your character:
64
+ ```json
65
+ {
66
+ "name": "Kim Ji-hyun",
67
+ "age": 22,
68
+ "interests": [
69
+ {
70
+ "interest": "Organic Chemistry",
71
+ "levelOfInterest": 85
72
+ },
73
+ {
74
+ "interest": "Biochemistry",
75
+ "levelOfInterest": 70
76
+ },
77
+ {
78
+ "interest": "Analytical Chemistry",
79
+ "levelOfInterest": 60
80
+ }
81
+ ]
82
+ }
83
+ ```
84
+ RESPONSE
85
+
86
+ parser.parse(llm_example_response)
87
+ # {
88
+ # "name" => "Kim Ji-hyun",
89
+ # "age" => 22,
90
+ # "interests" => [
91
+ # {
92
+ # "interest" => "Organic Chemistry",
93
+ # "levelOfInterest" => 85
94
+ # },
95
+ # {
96
+ # "interest" => "Biochemistry",
97
+ # "levelOfInterest" => 70
98
+ # },
99
+ # {
100
+ # "interest" => "Analytical Chemistry",
101
+ # "levelOfInterest" => 60
102
+ # }
103
+ # ]
104
+ # }
@@ -6,7 +6,7 @@ module Langchain::Agent
6
6
  # Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
7
7
  #
8
8
  # Available:
9
- # - {Langchain::Agent::ChainOfThoughtAgent}
9
+ # - {Langchain::Agent::ReActAgent}
10
10
  #
11
11
  # @abstract
12
12
  class Base
@@ -1,11 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain::Agent
4
- # = Chain of Thought Agent
4
+ # = ReAct Agent
5
5
  #
6
6
  # llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]) # or your choice of Langchain::LLM::Base implementation
7
7
  #
8
- # agent = Langchain::Agent::ChainOfThoughtAgent.new(
8
+ # agent = Langchain::Agent::ReActAgent.new(
9
9
  # llm: llm,
10
10
  # tools: ["google_search", "calculator", "wikipedia"]
11
11
  # )
@@ -15,7 +15,7 @@ module Langchain::Agent
15
15
  #
16
16
  # agent.run(question: "How many full soccer fields would be needed to cover the distance between NYC and DC in a straight line?")
17
17
  # #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
18
- class ChainOfThoughtAgent < Base
18
+ class ReActAgent < Base
19
19
  attr_reader :llm, :tools, :max_iterations
20
20
 
21
21
  # Initializes the Agent
@@ -23,7 +23,7 @@ module Langchain::Agent
23
23
  # @param llm [Object] The LLM client to use
24
24
  # @param tools [Array] The tools to use
25
25
  # @param max_iterations [Integer] The maximum number of iterations to run
26
- # @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
26
+ # @return [ReActAgent] The Agent::ReActAgent instance
27
27
  def initialize(llm:, tools: [], max_iterations: 10)
28
28
  Langchain::Tool::Base.validate_tools!(tools: tools)
29
29
 
@@ -117,7 +117,7 @@ module Langchain::Agent
117
117
  # @return [PromptTemplate] PromptTemplate instance
118
118
  def prompt_template
119
119
  @template ||= Langchain::Prompt.load_from_path(
120
- file_path: Langchain.root.join("langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.yaml")
120
+ file_path: Langchain.root.join("langchain/agent/react_agent/react_agent_prompt.yaml")
121
121
  )
122
122
  end
123
123
 
@@ -1,18 +1,26 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
+ # Abstraction for data loaded by a {Langchain::Loader}
4
5
  class Data
6
+ # URL or Path of the data source
7
+ # @return [String]
5
8
  attr_reader :source
6
9
 
10
+ # @param data [String] data that was loaded
11
+ # @option options [String] :source URL or Path of the data source
7
12
  def initialize(data, options = {})
8
13
  @source = options[:source]
9
14
  @data = data
10
15
  end
11
16
 
17
+ # @return [String]
12
18
  def value
13
19
  @data
14
20
  end
15
21
 
22
+ # @param opts [Hash] options passed to the chunker
23
+ # @return [Array<String>]
16
24
  def chunks(opts = {})
17
25
  Langchain::Chunker::Text.new(@data, **opts).chunks
18
26
  end
@@ -5,7 +5,7 @@ module Langchain::LLM
5
5
  # Wrapper around AI21 Studio APIs.
6
6
  #
7
7
  # Gem requirements:
8
- # gem "ai21", "~> 0.2.0"
8
+ # gem "ai21", "~> 0.2.1"
9
9
  #
10
10
  # Usage:
11
11
  # ai21 = Langchain::LLM::AI21.new(api_key:)
@@ -13,9 +13,11 @@ module Langchain::LLM
13
13
  class AI21 < Base
14
14
  DEFAULTS = {
15
15
  temperature: 0.0,
16
- model: "j2-large"
16
+ model: "j2-ultra"
17
17
  }.freeze
18
18
 
19
+ LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AI21Validator
20
+
19
21
  def initialize(api_key:, default_options: {})
20
22
  depends_on "ai21"
21
23
  require "ai21"
@@ -34,6 +36,8 @@ module Langchain::LLM
34
36
  def complete(prompt:, **params)
35
37
  parameters = complete_parameters params
36
38
 
39
+ parameters[:maxTokens] = LENGTH_VALIDATOR.validate_max_tokens!(prompt, parameters[:model], client)
40
+
37
41
  response = client.complete(prompt, parameters)
38
42
  response.dig(:completions, 0, :data, :text)
39
43
  end
@@ -13,9 +13,10 @@ module Langchain::LLM
13
13
  class Cohere < Base
14
14
  DEFAULTS = {
15
15
  temperature: 0.0,
16
- completion_model_name: "base",
16
+ completion_model_name: "command",
17
17
  embeddings_model_name: "small",
18
- dimension: 1024
18
+ dimension: 1024,
19
+ truncate: "START"
19
20
  }.freeze
20
21
 
21
22
  def initialize(api_key:, default_options: {})
@@ -51,7 +52,8 @@ module Langchain::LLM
51
52
  default_params = {
52
53
  prompt: prompt,
53
54
  temperature: @defaults[:temperature],
54
- model: @defaults[:completion_model_name]
55
+ model: @defaults[:completion_model_name],
56
+ truncate: @defaults[:truncate]
55
57
  }
56
58
 
57
59
  if params[:stop_sequences]
@@ -60,6 +62,8 @@ module Langchain::LLM
60
62
 
61
63
  default_params.merge!(params)
62
64
 
65
+ default_params[:max_tokens] = Langchain::Utils::TokenLength::CohereValidator.validate_max_tokens!(prompt, default_params[:model], client)
66
+
63
67
  response = client.generate(**default_params)
64
68
  response.dig("generations").first.dig("text")
65
69
  end
@@ -5,21 +5,12 @@ module Langchain::LLM
5
5
  # Wrapper around the Google PaLM (Pathways Language Model) APIs: https://ai.google/build/machine-learning/
6
6
  #
7
7
  # Gem requirements:
8
- # gem "google_palm_api", "~> 0.1.0"
8
+ # gem "google_palm_api", "~> 0.1.2"
9
9
  #
10
10
  # Usage:
11
11
  # google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
12
12
  #
13
13
  class GooglePalm < Base
14
- #
15
- # Wrapper around the Google PaLM (Pathways Language Model) APIs.
16
- #
17
- # Gem requirements: gem "google_palm_api", "~> 0.1.1"
18
- #
19
- # Usage:
20
- # google_palm = Langchain::LLM::GooglePalm.new(api_key: "YOUR_API_KEY")
21
- #
22
-
23
14
  DEFAULTS = {
24
15
  temperature: 0.0,
25
16
  dimension: 768, # This is what the `embedding-gecko-001` model generates
@@ -61,7 +52,7 @@ module Langchain::LLM
61
52
  default_params = {
62
53
  prompt: prompt,
63
54
  temperature: @defaults[:temperature],
64
- completion_model_name: @defaults[:completion_model_name]
55
+ model: @defaults[:completion_model_name]
65
56
  }
66
57
 
67
58
  if params[:stop_sequences]
@@ -91,7 +82,7 @@ module Langchain::LLM
91
82
 
92
83
  default_params = {
93
84
  temperature: @defaults[:temperature],
94
- chat_completion_model_name: @defaults[:chat_completion_model_name],
85
+ model: @defaults[:chat_completion_model_name],
95
86
  context: context,
96
87
  messages: compose_chat_messages(prompt: prompt, messages: messages),
97
88
  examples: compose_examples(examples)
@@ -10,32 +10,64 @@ module Langchain
10
10
 
11
11
  URI_REGEX = %r{\A[A-Za-z][A-Za-z0-9+\-.]*://}
12
12
 
13
- # Load data from a file or url
14
- # Equivalent to Langchain::Loader.new(path).load
15
- # @param path [String | Pathname] path to file or url
16
- # @return [String] file content
13
+ # Load data from a file or URL. Shorthand for `Langchain::Loader.new(path).load`
14
+ #
15
+ # == Examples
16
+ #
17
+ # # load a URL
18
+ # data = Langchain::Loader.load("https://example.com/docs/README.md")
19
+ #
20
+ # # load a file
21
+ # data = Langchain::Loader.load("README.md")
22
+ #
23
+ # # Load data using a custom processor
24
+ # data = Langchain::Loader.load("README.md") do |raw_data, options|
25
+ # # your processing code goes here
26
+ # # return data at the end here
27
+ # end
28
+ #
29
+ # @param path [String | Pathname] path to file or URL
30
+ # @param options [Hash] options passed to the processor class used to process the data
31
+ # @return [Data] data loaded from path
17
32
  def self.load(path, options = {}, &block)
18
33
  new(path, options).load(&block)
19
34
  end
20
35
 
21
36
  # Initialize Langchain::Loader
22
- # @param path [String | Pathname] path to file or url
37
+ # @param path [String | Pathname] path to file or URL
38
+ # @param options [Hash] options passed to the processor class used to process the data
23
39
  # @return [Langchain::Loader] loader instance
24
40
  def initialize(path, options = {})
25
41
  @options = options
26
42
  @path = path
27
43
  end
28
44
 
29
- # Check if path is url
30
- # @return [Boolean] true if path is url
45
+ # Is the path a URL?
46
+ #
47
+ # @return [Boolean] true if path is URL
31
48
  def url?
32
49
  return false if @path.is_a?(Pathname)
33
50
 
34
51
  !!(@path =~ URI_REGEX)
35
52
  end
36
53
 
37
- # Load data from a file or url
38
- # @return [String] file content
54
+ # Load data from a file or URL
55
+ #
56
+ # loader = Langchain::Loader.new("README.md")
57
+ # # Load data using default processor for the file
58
+ # loader.load
59
+ #
60
+ # # Load data using a custom processor
61
+ # loader.load do |raw_data, options|
62
+ # # your processing code goes here
63
+ # # return data at the end here
64
+ # end
65
+ #
66
+ # @yield [String, Hash] handle parsing raw output into string directly
67
+ # @yieldparam [String] raw_data from the loaded URL or file
68
+ # @yieldreturn [String] parsed data, as a String
69
+ #
70
+ # @return [Data] data that was loaded
39
71
  def load(&block)
40
72
  @raw_data = url? ? load_from_url : load_from_path
41
73
 
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::OutputParsers
4
+ # Structured output parsers from the LLM.
5
+ #
6
+ # @abstract
7
+ class Base
8
+ #
9
+ # Parse the output of an LLM call.
10
+ #
11
+ # @param text - LLM output to parse.
12
+ # @returns Parsed output.
13
+ #
14
+ def parse(text:)
15
+ raise NotImplementedError
16
+ end
17
+
18
+ #
19
+ # Return a string describing the format of the output.
20
+ #
21
+ # @returns Format instructions.
22
+ # @param options - Options for formatting instructions.
23
+ # @example
24
+ # ```json
25
+ # {
26
+ # "foo": "bar"
27
+ # }
28
+ # ```
29
+ #
30
+ def get_format_instructions
31
+ raise NotImplementedError
32
+ end
33
+ end
34
+
35
+ class OutputParserException < StandardError
36
+ def initialize(message, text)
37
+ @message = message
38
+ @text = text
39
+ end
40
+
41
+ def to_s
42
+ "#{@message}\nText: #{@text}"
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+ require "json-schema"
5
+
6
+ module Langchain::OutputParsers
7
+ # = Structured Output Parser
8
+ #
9
+ class StructuredOutputParser < Base
10
+ attr_reader :schema
11
+
12
+ #
13
+ # Initializes a new instance of the class.
14
+ #
15
+ # @param schema [JSON::Schema] The json schema
16
+ #
17
+ def initialize(schema:)
18
+ @schema = validate_schema!(schema)
19
+ end
20
+
21
+ def to_h
22
+ {
23
+ _type: "StructuredOutputParser",
24
+ schema: schema.to_json
25
+ }
26
+ end
27
+
28
+ #
29
+ # Creates a new instance of the class using the given JSON::Schema.
30
+ #
31
+ # @param schema [JSON::Schema] The JSON::Schema to use
32
+ #
33
+ # @return [Object] A new instance of the class
34
+ #
35
+ def self.from_json_schema(schema)
36
+ new(schema: schema)
37
+ end
38
+
39
+ #
40
+ # Returns a string containing instructions for how the output of a language model should be formatted
41
+ # according to the @schema.
42
+ #
43
+ # @return [String] Instructions for how the output of a language model should be formatted
44
+ # according to the @schema.
45
+ #
46
+ def get_format_instructions
47
+ <<~INSTRUCTIONS
48
+ You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
49
+
50
+ "JSON Schema" is a declarative language that allows you to annotate and validate JSON documents.
51
+
52
+ For example, the example "JSON Schema" instance {"properties": {"foo": {"description": "a list of test words", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}}
53
+ would match an object with one required property, "foo". The "type" property specifies "foo" must be an "array", and the "description" property semantically describes it as "a list of test words". The items within "foo" must be strings.
54
+ Thus, the object {"foo": ["bar", "baz"]} is a well-formatted instance of this example "JSON Schema". The object {"properties": {"foo": ["bar", "baz"]}}} is not well-formatted.
55
+
56
+ Your output will be parsed and type-checked according to the provided schema instance, so make sure all fields in your output match the schema exactly and there are no trailing commas!
57
+
58
+ Here is the JSON Schema instance your output must adhere to. Include the enclosing markdown codeblock:
59
+ ```json
60
+ #{schema.to_json}
61
+ ```
62
+ INSTRUCTIONS
63
+ end
64
+
65
+ #
66
+ # Parse the output of an LLM call extracting an object that abides by the @schema
67
+ #
68
+ # @param text [String] Text output from the LLM call
69
+ #
70
+ # @return [Object] object that abides by the @schema
71
+ #
72
+ def parse(text)
73
+ json = text.include?("```") ? text.strip.split(/```(?:json)?/)[1] : text.strip
74
+ parsed = JSON.parse(json)
75
+ JSON::Validator.validate!(schema, parsed)
76
+ parsed
77
+ rescue => e
78
+ raise OutputParserException.new("Failed to parse. Text: \"#{text}\". Error: #{e}", text)
79
+ end
80
+
81
+ private
82
+
83
+ def validate_schema!(schema)
84
+ errors = JSON::Validator.fully_validate_schema(schema)
85
+ unless errors.empty?
86
+ raise ArgumentError, "Invalid schema: \n#{errors.join("\n")}"
87
+ end
88
+ schema
89
+ end
90
+ end
91
+ end
@@ -27,7 +27,7 @@ module Langchain::Tool
27
27
  #
28
28
  # 3. Pass the tools when Agent is instantiated.
29
29
  #
30
- # agent = Langchain::Agent::ChainOfThoughtAgent.new(
30
+ # agent = Langchain::Agent::ReActAgent.new(
31
31
  # llm: :openai, # or :cohere, :hugging_face, :google_palm or :replicate
32
32
  # llm_api_key: ENV["OPENAI_API_KEY"],
33
33
  # tools: ["google_search", "calculator", "wikipedia"]
@@ -14,7 +14,7 @@ module Langchain::Tool
14
14
  The input to this tool should be valid SQL.
15
15
  DESC
16
16
 
17
- attr_reader :db, :requested_tables, :except_tables
17
+ attr_reader :db, :requested_tables, :excluded_tables
18
18
 
19
19
  #
20
20
  # Establish a database connection
@@ -25,16 +25,15 @@ module Langchain::Tool
25
25
 
26
26
  # @return [Database] Database object
27
27
  #
28
- def initialize(connection_string:, tables: [], except_tables: [])
28
+ def initialize(connection_string:, tables: [], exclude_tables: [])
29
29
  depends_on "sequel"
30
30
  require "sequel"
31
- require "sequel/extensions/schema_dumper"
32
31
 
33
32
  raise StandardError, "connection_string parameter cannot be blank" if connection_string.empty?
34
33
 
35
34
  @db = Sequel.connect(connection_string)
36
35
  @requested_tables = tables
37
- @except_tables = except_tables
36
+ @excluded_tables = exclude_tables
38
37
  end
39
38
 
40
39
  #
@@ -46,20 +45,31 @@ module Langchain::Tool
46
45
  Langchain.logger.info("Dumping schema tables and keys", for: self.class)
47
46
  schema = ""
48
47
  db.tables.each do |table|
49
- next if except_tables.include?(table)
48
+ next if excluded_tables.include?(table)
50
49
  next unless requested_tables.empty? || requested_tables.include?(table)
51
50
 
51
+ primary_key_columns = []
52
+ primary_key_column_count = db.schema(table).count { |column| column[1][:primary_key] == true }
53
+
52
54
  schema << "CREATE TABLE #{table}(\n"
53
55
  db.schema(table).each do |column|
54
56
  schema << "#{column[0]} #{column[1][:type]}"
55
- schema << " PRIMARY KEY" if column[1][:primary_key] == true
56
- schema << "," unless column == db.schema(table).last
57
- schema << "\n"
57
+ if column[1][:primary_key] == true
58
+ schema << " PRIMARY KEY" if primary_key_column_count == 1
59
+ else
60
+ primary_key_columns << column[0]
61
+ end
62
+ schema << ",\n" unless column == db.schema(table).last && primary_key_column_count == 1
63
+ end
64
+ if primary_key_column_count > 1
65
+ schema << "PRIMARY KEY (#{primary_key_columns.join(",")})"
58
66
  end
59
- schema << ");\n"
60
67
  db.foreign_key_list(table).each do |fk|
61
- schema << "ALTER TABLE #{table} ADD FOREIGN KEY (#{fk[:columns][0]}) REFERENCES #{fk[:table]}(#{fk[:key][0]});\n"
68
+ schema << ",\n" if fk == db.foreign_key_list(table).first
69
+ schema << "FOREIGN KEY (#{fk[:columns][0]}) REFERENCES #{fk[:table]}(#{fk[:key][0]})"
70
+ schema << ",\n" unless fk == db.foreign_key_list(table).last
62
71
  end
72
+ schema << ");\n"
63
73
  end
64
74
  schema
65
75
  end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Utils
5
+ module TokenLength
6
+ #
7
+ # This class is meant to validate the length of the text passed in to AI21's API.
8
+ # It is used to validate the token length before the API call is made
9
+ #
10
+
11
+ class AI21Validator < BaseValidator
12
+ TOKEN_LIMITS = {
13
+ "j2-ultra" => 8192,
14
+ "j2-mid" => 8192,
15
+ "j2-light" => 8192
16
+ }.freeze
17
+
18
+ #
19
+ # Calculate token length for a given text and model name
20
+ #
21
+ # @param text [String] The text to calculate the token length for
22
+ # @param model_name [String] The model name to validate against
23
+ # @return [Integer] The token length of the text
24
+ #
25
+ def self.token_length(text, model_name, client)
26
+ res = client.tokenize(text)
27
+ res.dig(:tokens).length
28
+ end
29
+
30
+ def self.token_limit(model_name)
31
+ TOKEN_LIMITS[model_name]
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain
4
+ module Utils
5
+ module TokenLength
6
+ #
7
+ # This class is meant to validate the length of the text passed in to Cohere's API.
8
+ # It is used to validate the token length before the API call is made
9
+ #
10
+
11
+ class CohereValidator < BaseValidator
12
+ TOKEN_LIMITS = {
13
+ # Source:
14
+ # https://docs.cohere.com/docs/models
15
+ "command-light" => 4096,
16
+ "command" => 4096,
17
+ "base-light" => 2048,
18
+ "base" => 2048,
19
+ "embed-english-light-v2.0" => 512,
20
+ "embed-english-v2.0" => 512,
21
+ "embed-multilingual-v2.0" => 256,
22
+ "summarize-medium" => 2048,
23
+ "summarize-xlarge" => 2048
24
+ }.freeze
25
+
26
+ #
27
+ # Calculate token length for a given text and model name
28
+ #
29
+ # @param text [String] The text to calculate the token length for
30
+ # @param model_name [String] The model name to validate against
31
+ # @return [Integer] The token length of the text
32
+ #
33
+ def self.token_length(text, model_name, client)
34
+ res = client.tokenize(text: text)
35
+ res["tokens"].length
36
+ end
37
+
38
+ def self.token_limit(model_name)
39
+ TOKEN_LIMITS[model_name]
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.5.7"
4
+ VERSION = "0.6.0"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -74,7 +74,7 @@ module Langchain
74
74
 
75
75
  module Agent
76
76
  autoload :Base, "langchain/agent/base"
77
- autoload :ChainOfThoughtAgent, "langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb"
77
+ autoload :ReActAgent, "langchain/agent/react_agent/react_agent.rb"
78
78
  autoload :SQLQueryAgent, "langchain/agent/sql_query_agent/sql_query_agent.rb"
79
79
  end
80
80
 
@@ -108,9 +108,11 @@ module Langchain
108
108
  module Utils
109
109
  module TokenLength
110
110
  autoload :BaseValidator, "langchain/utils/token_length/base_validator"
111
- autoload :TokenLimitExceeded, "langchain/utils/token_length/token_limit_exceeded"
112
- autoload :OpenAIValidator, "langchain/utils/token_length/openai_validator"
111
+ autoload :AI21Validator, "langchain/utils/token_length/ai21_validator"
112
+ autoload :CohereValidator, "langchain/utils/token_length/cohere_validator"
113
113
  autoload :GooglePalmValidator, "langchain/utils/token_length/google_palm_validator"
114
+ autoload :OpenAIValidator, "langchain/utils/token_length/openai_validator"
115
+ autoload :TokenLimitExceeded, "langchain/utils/token_length/token_limit_exceeded"
114
116
  end
115
117
  end
116
118
 
@@ -143,6 +145,11 @@ module Langchain
143
145
  autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
144
146
  end
145
147
 
148
+ module OutputParsers
149
+ autoload :Base, "langchain/output_parsers/base"
150
+ autoload :StructuredOutputParser, "langchain/output_parsers/structured"
151
+ end
152
+
146
153
  module Errors
147
154
  class BaseError < StandardError; end
148
155
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.7
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-06-20 00:00:00.000000000 Z
11
+ date: 2023-06-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
54
  version: 0.0.5
55
+ - !ruby/object:Gem::Dependency
56
+ name: json-schema
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 4.0.0
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 4.0.0
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: dotenv-rails
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -114,14 +128,14 @@ dependencies:
114
128
  requirements:
115
129
  - - "~>"
116
130
  - !ruby/object:Gem::Version
117
- version: 0.2.0
131
+ version: 0.2.1
118
132
  type: :development
119
133
  prerelease: false
120
134
  version_requirements: !ruby/object:Gem::Requirement
121
135
  requirements:
122
136
  - - "~>"
123
137
  - !ruby/object:Gem::Version
124
- version: 0.2.0
138
+ version: 0.2.1
125
139
  - !ruby/object:Gem::Dependency
126
140
  name: chroma-db
127
141
  requirement: !ruby/object:Gem::Requirement
@@ -142,14 +156,14 @@ dependencies:
142
156
  requirements:
143
157
  - - "~>"
144
158
  - !ruby/object:Gem::Version
145
- version: 0.9.4
159
+ version: 0.9.5
146
160
  type: :development
147
161
  prerelease: false
148
162
  version_requirements: !ruby/object:Gem::Requirement
149
163
  requirements:
150
164
  - - "~>"
151
165
  - !ruby/object:Gem::Version
152
- version: 0.9.4
166
+ version: 0.9.5
153
167
  - !ruby/object:Gem::Dependency
154
168
  name: docx
155
169
  requirement: !ruby/object:Gem::Requirement
@@ -184,14 +198,14 @@ dependencies:
184
198
  requirements:
185
199
  - - "~>"
186
200
  - !ruby/object:Gem::Version
187
- version: 0.1.1
201
+ version: 0.1.2
188
202
  type: :development
189
203
  prerelease: false
190
204
  version_requirements: !ruby/object:Gem::Requirement
191
205
  requirements:
192
206
  - - "~>"
193
207
  - !ruby/object:Gem::Version
194
- version: 0.1.1
208
+ version: 0.1.2
195
209
  - !ruby/object:Gem::Dependency
196
210
  name: google_search_results
197
211
  requirement: !ruby/object:Gem::Requirement
@@ -462,6 +476,7 @@ files:
462
476
  - Rakefile
463
477
  - examples/create_and_manage_few_shot_prompt_templates.rb
464
478
  - examples/create_and_manage_prompt_templates.rb
479
+ - examples/create_and_manage_prompt_templates_using_structured_output_parser.rb
465
480
  - examples/pdf_store_and_query_with_chroma.rb
466
481
  - examples/store_and_query_with_pinecone.rb
467
482
  - examples/store_and_query_with_qdrant.rb
@@ -469,8 +484,8 @@ files:
469
484
  - lefthook.yml
470
485
  - lib/langchain.rb
471
486
  - lib/langchain/agent/base.rb
472
- - lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb
473
- - lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.yaml
487
+ - lib/langchain/agent/react_agent/react_agent.rb
488
+ - lib/langchain/agent/react_agent/react_agent_prompt.yaml
474
489
  - lib/langchain/agent/sql_query_agent/sql_query_agent.rb
475
490
  - lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
476
491
  - lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
@@ -489,6 +504,8 @@ files:
489
504
  - lib/langchain/llm/prompts/summarize_template.yaml
490
505
  - lib/langchain/llm/replicate.rb
491
506
  - lib/langchain/loader.rb
507
+ - lib/langchain/output_parsers/base.rb
508
+ - lib/langchain/output_parsers/structured.rb
492
509
  - lib/langchain/processors/base.rb
493
510
  - lib/langchain/processors/csv.rb
494
511
  - lib/langchain/processors/docx.rb
@@ -509,7 +526,9 @@ files:
509
526
  - lib/langchain/tool/ruby_code_interpreter.rb
510
527
  - lib/langchain/tool/weather.rb
511
528
  - lib/langchain/tool/wikipedia.rb
529
+ - lib/langchain/utils/token_length/ai21_validator.rb
512
530
  - lib/langchain/utils/token_length/base_validator.rb
531
+ - lib/langchain/utils/token_length/cohere_validator.rb
513
532
  - lib/langchain/utils/token_length/google_palm_validator.rb
514
533
  - lib/langchain/utils/token_length/openai_validator.rb
515
534
  - lib/langchain/utils/token_length/token_limit_exceeded.rb