langchainrb 0.6.4 → 0.6.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 78810f63a496c6b98208a9c838cbdae41a8c944879e68f16fc4362de90c49110
4
- data.tar.gz: c95d357da62c8120a2a105a94b219ca1f3552f85fff30bb7cb3d40def336baeb
3
+ metadata.gz: a9949f3ffd0338c90274f13b9862b0a6b9ec7b717b14b7ccaa8b6b8e0115f621
4
+ data.tar.gz: 43ebcb26d51b286278d5098ba50defef0c8bd1a897fa744c4519cfa10bdfdf58
5
5
  SHA512:
6
- metadata.gz: ee0c549ecebd98ce940b6dc05c8aa2783c265d7cb3903ca30448be0f906e89f353e419b2bb862178fe9081baa002b42fd7aaf88ec244a63beec9bc862e3a9410
7
- data.tar.gz: a4b67c5b0d268d6b96622209fe3201c8585bf44d1d44dca0bc061de3f1ba1797e87df61111ddc6565b0d75b23a06677aa3bad6e41fbd4a119ff69f6b11e756ee
6
+ metadata.gz: c95f6e104aaa9a8dab30c9e78e342fdf960ccfef332a2737218f3cc186521369e6f03216d5ccd08329d5110cd15ef10e10a3f460caecc02dd50e32b1b60ff8b3
7
+ data.tar.gz: c8c059c760b361975ea7ba8eb8a7aa24c1dd7dde5264d7d8bdf20da4f7ec80fe3f1cf4f60dd16dd8028638f3335b1e1632b655ae6c4bdd01912d33371892b5a3
data/.env.example CHANGED
@@ -1,10 +1,13 @@
1
1
  AI21_API_KEY=
2
2
  CHROMA_URL=
3
3
  COHERE_API_KEY=
4
+ GOOGLE_PALM_API_KEY=
4
5
  HUGGING_FACE_API_KEY=
6
+ LLAMACPP_MODEL_PATH=
7
+ LLAMACPP_N_THREADS=
8
+ LLAMACPP_N_GPU_LAYERS=
5
9
  MILVUS_URL=
6
10
  OPENAI_API_KEY=
7
- GOOGLE_PALM_API_KEY=
8
11
  OPEN_WEATHER_API_KEY=
9
12
  PINECONE_API_KEY=
10
13
  PINECONE_ENVIRONMENT=
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.6] - 2023-07-13
4
+ - Langchain::Chunker::RecursiveText
5
+ - Fixes
6
+
7
+ ## [0.6.5] - 2023-07-06
8
+ - 🗣️ LLMs
9
+ - Introducing Llama.cpp support
10
+ - Langchain::OutputParsers::OutputFixingParser to wrap a Langchain::OutputParser and handle invalid response
11
+
3
12
  ## [0.6.4] - 2023-07-01
4
13
  - Fix `Langchain::Vectorsearch::Qdrant#add_texts()`
5
14
  - Introduce `ConversationMemory`
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.4)
4
+ langchainrb (0.6.6)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
@@ -153,6 +153,7 @@ GEM
153
153
  addressable (>= 2.8)
154
154
  language_server-protocol (3.17.0.3)
155
155
  lint_roller (1.0.0)
156
+ llama_cpp (0.3.0)
156
157
  loofah (2.21.1)
157
158
  crass (~> 1.0.2)
158
159
  nokogiri (>= 1.5.9)
@@ -327,6 +328,7 @@ DEPENDENCIES
327
328
  hnswlib (~> 0.8.1)
328
329
  hugging-face (~> 0.3.4)
329
330
  langchainrb!
331
+ llama_cpp
330
332
  milvus (~> 0.9.0)
331
333
  nokogiri (~> 1.13)
332
334
  open-weather-ruby-client (~> 0.3.0)
data/README.md CHANGED
@@ -39,7 +39,7 @@ require "langchain"
39
39
  | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
40
  | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
41
  | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
42
- | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
42
+ | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
43
43
  | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
44
44
  | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
45
45
 
@@ -54,7 +54,7 @@ Pick the vector search database you'll be using and instantiate the client:
54
54
  client = Langchain::Vectorsearch::Weaviate.new(
55
55
  url: ENV["WEAVIATE_URL"],
56
56
  api_key: ENV["WEAVIATE_API_KEY"],
57
- index: "",
57
+ index_name: "",
58
58
  llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
59
59
  )
60
60
 
@@ -274,6 +274,106 @@ prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/promp
274
274
  prompt.input_variables #=> ["adjective", "content"]
275
275
  ```
276
276
 
277
+ ### Using Output Parsers
278
+
279
+ Parse LLM text responses into structured output, such as JSON.
280
+
281
+ #### Structured Output Parser
282
+
283
+ You can use the `StructuredOutputParser` to generate a prompt that instructs the LLM to provide a JSON response adhering to a specific JSON schema:
284
+
285
+ ```ruby
286
+ json_schema = {
287
+ type: "object",
288
+ properties: {
289
+ name: {
290
+ type: "string",
291
+ description: "Persons name"
292
+ },
293
+ age: {
294
+ type: "number",
295
+ description: "Persons age"
296
+ },
297
+ interests: {
298
+ type: "array",
299
+ items: {
300
+ type: "object",
301
+ properties: {
302
+ interest: {
303
+ type: "string",
304
+ description: "A topic of interest"
305
+ },
306
+ levelOfInterest: {
307
+ type: "number",
308
+ description: "A value between 0 and 100 of how interested the person is in this interest"
309
+ }
310
+ },
311
+ required: ["interest", "levelOfInterest"],
312
+ additionalProperties: false
313
+ },
314
+ minItems: 1,
315
+ maxItems: 3,
316
+ description: "A list of the person's interests"
317
+ }
318
+ },
319
+ required: ["name", "age", "interests"],
320
+ additionalProperties: false
321
+ }
322
+ parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema(json_schema)
323
+ prompt = Langchain::Prompt::PromptTemplate.new(template: "Generate details of a fictional character.\n{format_instructions}\nCharacter description: {description}", input_variables: ["description", "format_instructions"])
324
+ prompt_text = prompt.format(description: "Korean chemistry student", format_instructions: parser.get_format_instructions)
325
+ # Generate details of a fictional character.
326
+ # You must format your output as a JSON value that adheres to a given "JSON Schema" instance.
327
+ # ...
328
+ ```
329
+
330
+ Then parse the llm response:
331
+
332
+ ```ruby
333
+ llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
334
+ llm_response = llm.chat(prompt: prompt_text)
335
+ parser.parse(llm_response)
336
+ # {
337
+ # "name" => "Kim Ji-hyun",
338
+ # "age" => 22,
339
+ # "interests" => [
340
+ # {
341
+ # "interest" => "Organic Chemistry",
342
+ # "levelOfInterest" => 85
343
+ # },
344
+ # ...
345
+ # ]
346
+ # }
347
+ ```
348
+
349
+ If the parser fails to parse the LLM response, you can use the `OutputFixingParser`. It sends an error message, prior output, and the original prompt text to the LLM, asking for a "fixed" response:
350
+
351
+ ```ruby
352
+ begin
353
+ parser.parse(llm_response)
354
+ rescue Langchain::OutputParsers::OutputParserException => e
355
+ fix_parser = Langchain::OutputParsers::OutputFixingParser.from_llm(
356
+ llm: llm,
357
+ parser: parser
358
+ )
359
+ fix_parser.parse(llm_response)
360
+ end
361
+ ```
362
+
363
+ Alternatively, if you don't need to handle the `OutputParserException`, you can simplify the code:
364
+
365
+ ```ruby
366
+ # we already have the `OutputFixingParser`:
367
+ # parser = Langchain::OutputParsers::StructuredOutputParser.from_json_schema(json_schema)
368
+ fix_parser = Langchain::OutputParsers::OutputFixingParser.from_llm(
369
+ llm: llm,
370
+ parser: parser
371
+ )
372
+ fix_parser.parse(llm_response)
373
+ ```
374
+
375
+ See [here](https://github.com/andreibondarev/langchainrb/tree/main/examples/create_and_manage_prompt_templates_using_structured_output_parser.rb) for a concrete example
376
+
277
377
  ### Using Agents 🤖
278
378
  Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
279
379
 
@@ -327,7 +427,7 @@ agent.run(question: "How many users have a name with length greater than 5 in th
327
427
  | "database" | Useful for querying a SQL database | | `gem "sequel", "~> 5.68.0"` |
328
428
  | "ruby_code_interpreter" | Interprets Ruby expressions | | `gem "safe_ruby", "~> 1.0.4"` |
329
429
  | "google_search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
330
- | "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
430
+ | "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY"]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
331
431
  | "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
332
432
 
333
433
  #### Loaders 🚚
@@ -58,6 +58,11 @@ prompt.format(description: "Korean chemistry student", format_instructions: pars
58
58
 
59
59
  # Character description: Korean chemistry student
60
60
 
61
+ llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
62
+ # llm_response = llm.chat(
63
+ # prompt: prompt.format(description: "Korean chemistry student", format_instructions: parser.get_format_instructions)
64
+ # )
65
+
61
66
  # LLM example response:
62
67
  llm_example_response = <<~RESPONSE
63
68
  Here is your character:
@@ -83,7 +88,14 @@ llm_example_response = <<~RESPONSE
83
88
  ```
84
89
  RESPONSE
85
90
 
86
- parser.parse(llm_example_response)
91
+ fix_parser = Langchain::OutputParsers::OutputFixingParser.from_llm(
92
+ llm: llm,
93
+ parser: parser
94
+ )
95
+ # The OutputFixingParser wraps the StructuredOutputParser such that if initial
96
+ # LLM response does not conform to the schema, will call out the LLM to fix
97
+ # the error
98
+ fix_parser.parse(llm_example_response)
87
99
  # {
88
100
  # "name" => "Kim Ji-hyun",
89
101
  # "age" => 22,
@@ -0,0 +1,24 @@
1
+ require "langchain"
2
+
3
+ llm = Langchain::LLM::LlamaCpp.new(
4
+ model_path: ENV["LLAMACPP_MODEL_PATH"],
5
+ n_gpu_layers: Integer(ENV["LLAMACPP_N_GPU_LAYERS"]),
6
+ n_threads: Integer(ENV["LLAMACPP_N_THREADS"])
7
+ )
8
+
9
+ instructions = [
10
+ "Tell me about the creator of Ruby",
11
+ "Write a story about a pony who goes to the store to buy some apples."
12
+ ]
13
+
14
+ template = Langchain::Prompt::PromptTemplate.new(
15
+ template: "{instruction}\n\n### Response:",
16
+ input_variables: %w[instruction]
17
+ )
18
+
19
+ instructions.each do |instruction|
20
+ puts "USER: #{instruction}"
21
+ prompt = template.format(instruction: instruction)
22
+ response = llm.complete prompt: prompt, n_predict: 1024
23
+ puts "ASSISTANT: #{response}"
24
+ end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "baran"
4
+
5
+ module Langchain
6
+ module Chunker
7
+ #
8
+ # Recursive text chunker. Preferentially splits on separators.
9
+ #
10
+ # Usage:
11
+ # Langchain::Chunker::RecursiveText.new(text).chunks
12
+ #
13
+ class RecursiveText < Base
14
+ attr_reader :text, :chunk_size, :chunk_overlap, :separators
15
+
16
+ # @param [String] text
17
+ # @param [Integer] chunk_size
18
+ # @param [Integer] chunk_overlap
19
+ # @param [Array<String>] separators
20
+ def initialize(text, chunk_size: 1000, chunk_overlap: 200, separators: ["\n\n"])
21
+ @text = text
22
+ @chunk_size = chunk_size
23
+ @chunk_overlap = chunk_overlap
24
+ @separators = separators
25
+ end
26
+
27
+ # @return [Array<String>]
28
+ def chunks
29
+ splitter = Baran::RecursiveCharacterTextSplitter.new(
30
+ chunk_size: chunk_size,
31
+ chunk_overlap: chunk_overlap,
32
+ separators: separators
33
+ )
34
+ splitter.chunks(text)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -11,6 +11,7 @@ module Langchain::LLM
11
11
  # - {Langchain::LLM::Cohere}
12
12
  # - {Langchain::LLM::GooglePalm}
13
13
  # - {Langchain::LLM::HuggingFace}
14
+ # - {Langchain::LLM::LlamaCpp}
14
15
  # - {Langchain::LLM::OpenAI}
15
16
  # - {Langchain::LLM::Replicate}
16
17
  #
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::LLM
4
+ # A wrapper around the LlamaCpp.rb library
5
+ #
6
+ # Gem requirements:
7
+ # gem "llama_cpp"
8
+ #
9
+ # Usage:
10
+ # llama = Langchain::LLM::LlamaCpp.new(
11
+ # model_path: ENV["LLAMACPP_MODEL_PATH"],
12
+ # n_gpu_layers: Integer(ENV["LLAMACPP_N_GPU_LAYERS"]),
13
+ # n_threads: Integer(ENV["LLAMACPP_N_THREADS"])
14
+ # )
15
+ #
16
+ class LlamaCpp < Base
17
+ attr_accessor :model_path, :n_gpu_layers, :n_ctx, :seed
18
+ attr_writer :n_threads
19
+
20
+ # @param model_path [String] The path to the model to use
21
+ # @param n_gpu_layers [Integer] The number of GPU layers to use
22
+ # @param n_ctx [Integer] The number of context tokens to use
23
+ # @param n_threads [Integer] The CPU number of threads to use
24
+ # @param seed [Integer] The seed to use
25
+ def initialize(model_path:, n_gpu_layers: 1, n_ctx: 2048, n_threads: 1, seed: -1)
26
+ depends_on "llama_cpp"
27
+ require "llama_cpp"
28
+
29
+ @model_path = model_path
30
+ @n_gpu_layers = n_gpu_layers
31
+ @n_ctx = n_ctx
32
+ @n_threads = n_threads
33
+ @seed = seed
34
+ end
35
+
36
+ # @params text [String] The text to embed
37
+ # @params n_threads [Integer] The number of CPU threads to use
38
+ # @return [Array] The embedding
39
+ def embed(text:, n_threads: nil)
40
+ # contexts are kinda stateful when it comes to embeddings, so allocate one each time
41
+ context = embedding_context
42
+
43
+ embedding_input = context.tokenize(text: text, add_bos: true)
44
+ return unless embedding_input.size.positive?
45
+
46
+ n_threads ||= self.n_threads
47
+
48
+ context.eval(tokens: embedding_input, n_past: 0, n_threads: n_threads)
49
+ context.embeddings
50
+ end
51
+
52
+ # @params prompt [String] The prompt to complete
53
+ # @params n_predict [Integer] The number of tokens to predict
54
+ # @params n_threads [Integer] The number of CPU threads to use
55
+ # @return [String] The completed prompt
56
+ def complete(prompt:, n_predict: 128, n_threads: nil)
57
+ n_threads ||= self.n_threads
58
+ # contexts do not appear to be stateful when it comes to completion, so re-use the same one
59
+ context = completion_context
60
+ ::LLaMACpp.generate(context, prompt, n_threads: n_threads, n_predict: n_predict)
61
+ end
62
+
63
+ private
64
+
65
+ def n_threads
66
+ # Use the maximum number of CPU threads available, if not configured
67
+ @n_threads ||= `sysctl -n hw.ncpu`.strip.to_i
68
+ end
69
+
70
+ def build_context_params(embeddings: false)
71
+ context_params = ::LLaMACpp::ContextParams.new
72
+
73
+ context_params.seed = seed
74
+ context_params.n_ctx = n_ctx
75
+ context_params.n_gpu_layers = n_gpu_layers
76
+ context_params.embedding = embeddings
77
+
78
+ context_params
79
+ end
80
+
81
+ def build_model(embeddings: false)
82
+ return @model if defined?(@model)
83
+ @model = ::LLaMACpp::Model.new(model_path: model_path, params: build_context_params(embeddings: embeddings))
84
+ end
85
+
86
+ def build_completion_context
87
+ ::LLaMACpp::Context.new(model: build_model)
88
+ end
89
+
90
+ def build_embedding_context
91
+ ::LLaMACpp::Context.new(model: build_model(embeddings: true))
92
+ end
93
+
94
+ def completion_context
95
+ @completion_context ||= build_completion_context
96
+ end
97
+
98
+ def embedding_context
99
+ @embedding_context ||= build_embedding_context
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Langchain::OutputParsers
4
+ # = Output Fixing Parser
5
+ #
6
+ class OutputFixingParser < Base
7
+ attr_reader :llm, :parser, :prompt
8
+
9
+ #
10
+ # Initializes a new instance of the class.
11
+ #
12
+ # @param llm [Langchain::LLM] The LLM used in the fixing process
13
+ # @param parser [Langchain::OutputParsers] The parser originally used which resulted in parsing error
14
+ # @param prompt [Langchain::Prompt::PromptTemplate]
15
+ #
16
+ def initialize(llm:, parser:, prompt:)
17
+ raise ArgumentError.new("llm must be an instance of Langchain::LLM got: #{llm.class}") unless llm.is_a?(Langchain::LLM::Base)
18
+ raise ArgumentError.new("parser must be an instance of Langchain::OutputParsers got #{parser.class}") unless parser.is_a?(Langchain::OutputParsers::Base)
19
+ raise ArgumentError.new("prompt must be an instance of Langchain::Prompt::PromptTemplate got #{prompt.class}") unless prompt.is_a?(Langchain::Prompt::PromptTemplate)
20
+ @llm = llm
21
+ @parser = parser
22
+ @prompt = prompt
23
+ end
24
+
25
+ def to_h
26
+ {
27
+ _type: "OutputFixingParser",
28
+ parser: parser.to_h,
29
+ prompt: prompt.to_h
30
+ }
31
+ end
32
+
33
+ #
34
+ # calls get_format_instructions on the @parser
35
+ #
36
+ # @return [String] Instructions for how the output of a language model should be formatted
37
+ # according to the @schema.
38
+ #
39
+ def get_format_instructions
40
+ parser.get_format_instructions
41
+ end
42
+
43
+ #
44
+ # Parse the output of an LLM call, if fails with OutputParserException
45
+ # then call the LLM with a fix prompt in an attempt to get the correctly
46
+ # formatted response
47
+ #
48
+ # @param completion [String] Text output from the LLM call
49
+ #
50
+ # @return [Object] object that is succesfully parsed by @parser.parse
51
+ #
52
+ def parse(completion)
53
+ parser.parse(completion)
54
+ rescue OutputParserException => e
55
+ new_completion = llm.chat(
56
+ prompt: prompt.format(
57
+ instructions: parser.get_format_instructions,
58
+ completion: completion,
59
+ error: e
60
+ )
61
+ )
62
+ parser.parse(new_completion)
63
+ end
64
+
65
+ #
66
+ # Creates a new instance of the class using the given JSON::Schema.
67
+ #
68
+ # @param schema [JSON::Schema] The JSON::Schema to use
69
+ #
70
+ # @return [Object] A new instance of the class
71
+ #
72
+ def self.from_llm(llm:, parser:, prompt: nil)
73
+ new(llm: llm, parser: parser, prompt: prompt || naive_fix_prompt)
74
+ end
75
+
76
+ private
77
+
78
+ private_class_method def self.naive_fix_prompt
79
+ Langchain::Prompt.load_from_path(
80
+ file_path: Langchain.root.join("langchain/output_parsers/prompts/naive_fix_prompt.yaml")
81
+ )
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,22 @@
1
+ _type: prompt
2
+ input_variables:
3
+ - instructions
4
+ - completion
5
+ - error
6
+ template: |
7
+ Instructions:
8
+ --------------
9
+ {instructions}
10
+ --------------
11
+ Completion:
12
+ --------------
13
+ {completion}
14
+ --------------
15
+
16
+ Above, the Completion did not satisfy the constraints given in the Instructions.
17
+ Error:
18
+ --------------
19
+ {error}
20
+ --------------
21
+
22
+ Please try again. Please only respond with an answer that satisfies the constraints laid out in the Instructions:
@@ -161,12 +161,16 @@ module Langchain::Vectorsearch
161
161
  end
162
162
 
163
163
  def add_data(paths:)
164
- raise ArgumentError, "Paths must be provided" if paths.to_a.empty?
164
+ raise ArgumentError, "Paths must be provided" if Array(paths).empty?
165
165
 
166
166
  texts = Array(paths)
167
167
  .flatten
168
- .map { |path| Langchain::Loader.new(path)&.load&.value }
169
- .compact
168
+ .map do |path|
169
+ data = Langchain::Loader.new(path)&.load&.chunks
170
+ data.map { |chunk| chunk[:text] }
171
+ end
172
+
173
+ texts.flatten!
170
174
 
171
175
  add_texts(texts: texts)
172
176
  end
@@ -40,20 +40,53 @@ module Langchain::Vectorsearch
40
40
  super(llm: llm)
41
41
  end
42
42
 
43
- # Add a list of texts to the index
43
+ # Upsert a list of texts to the index
44
44
  # @param texts [Array<String>] The texts to add to the index
45
- # @return [PG::Result] The response from the database
46
- def add_texts(texts:)
47
- data = texts.flat_map do |text|
48
- [text, llm.embed(text: text)]
45
+ # @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
46
+ # @return [PG::Result] The response from the database including the ids of
47
+ # the added or updated texts.
48
+ def upsert_texts(texts:, ids:)
49
+ data = texts.zip(ids).flat_map do |(text, id)|
50
+ [id, text, llm.embed(text: text)]
49
51
  end
50
- values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
52
+ values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
53
+ # see https://github.com/pgvector/pgvector#storing
51
54
  client.exec_params(
52
- "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values};",
55
+ "INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
56
+ #{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
53
57
  data
54
58
  )
55
59
  end
56
60
 
61
+ # Add a list of texts to the index
62
+ # @param texts [Array<String>] The texts to add to the index
63
+ # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
64
+ # @return [PG::Result] The response from the database including the ids of
65
+ # the added texts.
66
+ def add_texts(texts:, ids: nil)
67
+ if ids.nil? || ids.empty?
68
+ data = texts.flat_map do |text|
69
+ [text, llm.embed(text: text)]
70
+ end
71
+ values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
72
+ client.exec_params(
73
+ "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
74
+ data
75
+ )
76
+ else
77
+ upsert_texts(texts: texts, ids: ids)
78
+ end
79
+ end
80
+
81
+ # Update a list of ids and corresponding texts to the index
82
+ # @param texts [Array<String>] The texts to add to the index
83
+ # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
84
+ # @return [PG::Result] The response from the database including the ids of
85
+ # the updated texts.
86
+ def update_texts(texts:, ids:)
87
+ upsert_texts(texts: texts, ids: ids)
88
+ end
89
+
57
90
  # Create default schema
58
91
  # @return [PG::Result] The response from the database
59
92
  def create_default_schema
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.4"
4
+ VERSION = "0.6.6"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -82,6 +82,7 @@ module Langchain
82
82
  module Chunker
83
83
  autoload :Base, "langchain/chunker/base"
84
84
  autoload :Text, "langchain/chunker/text"
85
+ autoload :RecursiveText, "langchain/chunker/recursive_text"
85
86
  end
86
87
 
87
88
  module Tool
@@ -134,6 +135,7 @@ module Langchain
134
135
  autoload :Cohere, "langchain/llm/cohere"
135
136
  autoload :GooglePalm, "langchain/llm/google_palm"
136
137
  autoload :HuggingFace, "langchain/llm/hugging_face"
138
+ autoload :LlamaCpp, "langchain/llm/llama_cpp"
137
139
  autoload :OpenAI, "langchain/llm/openai"
138
140
  autoload :Replicate, "langchain/llm/replicate"
139
141
  end
@@ -153,6 +155,7 @@ module Langchain
153
155
  module OutputParsers
154
156
  autoload :Base, "langchain/output_parsers/base"
155
157
  autoload :StructuredOutputParser, "langchain/output_parsers/structured"
158
+ autoload :OutputFixingParser, "langchain/output_parsers/fix"
156
159
  end
157
160
 
158
161
  module Errors
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.4
4
+ version: 0.6.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-01 00:00:00.000000000 Z
11
+ date: 2023-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -262,6 +262,20 @@ dependencies:
262
262
  - - "~>"
263
263
  - !ruby/object:Gem::Version
264
264
  version: 0.9.0
265
+ - !ruby/object:Gem::Dependency
266
+ name: llama_cpp
267
+ requirement: !ruby/object:Gem::Requirement
268
+ requirements:
269
+ - - ">="
270
+ - !ruby/object:Gem::Version
271
+ version: '0'
272
+ type: :development
273
+ prerelease: false
274
+ version_requirements: !ruby/object:Gem::Requirement
275
+ requirements:
276
+ - - ">="
277
+ - !ruby/object:Gem::Version
278
+ version: '0'
265
279
  - !ruby/object:Gem::Dependency
266
280
  name: nokogiri
267
281
  requirement: !ruby/object:Gem::Requirement
@@ -478,6 +492,7 @@ files:
478
492
  - examples/create_and_manage_few_shot_prompt_templates.rb
479
493
  - examples/create_and_manage_prompt_templates.rb
480
494
  - examples/create_and_manage_prompt_templates_using_structured_output_parser.rb
495
+ - examples/llama_cpp.rb
481
496
  - examples/pdf_store_and_query_with_chroma.rb
482
497
  - examples/store_and_query_with_pinecone.rb
483
498
  - examples/store_and_query_with_qdrant.rb
@@ -492,6 +507,7 @@ files:
492
507
  - lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
493
508
  - lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
494
509
  - lib/langchain/chunker/base.rb
510
+ - lib/langchain/chunker/recursive_text.rb
495
511
  - lib/langchain/chunker/text.rb
496
512
  - lib/langchain/contextual_logger.rb
497
513
  - lib/langchain/conversation.rb
@@ -503,11 +519,14 @@ files:
503
519
  - lib/langchain/llm/cohere.rb
504
520
  - lib/langchain/llm/google_palm.rb
505
521
  - lib/langchain/llm/hugging_face.rb
522
+ - lib/langchain/llm/llama_cpp.rb
506
523
  - lib/langchain/llm/openai.rb
507
524
  - lib/langchain/llm/prompts/summarize_template.yaml
508
525
  - lib/langchain/llm/replicate.rb
509
526
  - lib/langchain/loader.rb
510
527
  - lib/langchain/output_parsers/base.rb
528
+ - lib/langchain/output_parsers/fix.rb
529
+ - lib/langchain/output_parsers/prompts/naive_fix_prompt.yaml
511
530
  - lib/langchain/output_parsers/structured.rb
512
531
  - lib/langchain/processors/base.rb
513
532
  - lib/langchain/processors/csv.rb