langchainrb 0.6.5 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3404535e036c3efe68fd12706d2ebb269caed87b562fc38434122b1be01a356d
4
- data.tar.gz: e3be77b32cf754235e8895fb1af60edca54cb5acb84278bfa2e39b6ed7c2abbe
3
+ metadata.gz: a9949f3ffd0338c90274f13b9862b0a6b9ec7b717b14b7ccaa8b6b8e0115f621
4
+ data.tar.gz: 43ebcb26d51b286278d5098ba50defef0c8bd1a897fa744c4519cfa10bdfdf58
5
5
  SHA512:
6
- metadata.gz: b3fae04c73176c758c2d2d32c3ac538f3e094eb10f378b9a8befbbdcc62b60e55941a1bfefcb61eac7daca43ef91d0e57306dbc26bd59afbdad6ab4efff2ba89
7
- data.tar.gz: 626bb4a226112ee6fe709077a6d49ba91c0483fee657848153e9cff61693183709aede5844237c24cc02c561f59be82ea1fd296fe1c3f4ee4d971494ee4dcd75
6
+ metadata.gz: c95f6e104aaa9a8dab30c9e78e342fdf960ccfef332a2737218f3cc186521369e6f03216d5ccd08329d5110cd15ef10e10a3f460caecc02dd50e32b1b60ff8b3
7
+ data.tar.gz: c8c059c760b361975ea7ba8eb8a7aa24c1dd7dde5264d7d8bdf20da4f7ec80fe3f1cf4f60dd16dd8028638f3335b1e1632b655ae6c4bdd01912d33371892b5a3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.6] - 2023-07-13
4
+ - Langchain::Chunker::RecursiveText
5
+ - Fixes
6
+
7
+ ## [0.6.5] - 2023-07-06
8
+ - 🗣️ LLMs
9
+ - Introducing Llama.cpp support
10
+ - Langchain::OutputParsers::OutputFixingParser to wrap a Langchain::OutputParser and handle invalid response
11
+
3
12
  ## [0.6.4] - 2023-07-01
4
13
  - Fix `Langchain::Vectorsearch::Qdrant#add_texts()`
5
14
  - Introduce `ConversationMemory`
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.5)
4
+ langchainrb (0.6.6)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
data/README.md CHANGED
@@ -39,7 +39,7 @@ require "langchain"
39
39
  | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
40
  | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
41
  | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
42
- | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
42
+ | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
43
43
  | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
44
44
  | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
45
45
 
@@ -54,7 +54,7 @@ Pick the vector search database you'll be using and instantiate the client:
54
54
  client = Langchain::Vectorsearch::Weaviate.new(
55
55
  url: ENV["WEAVIATE_URL"],
56
56
  api_key: ENV["WEAVIATE_API_KEY"],
57
- index: "",
57
+ index_name: "",
58
58
  llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
59
59
  )
60
60
 
@@ -427,7 +427,7 @@ agent.run(question: "How many users have a name with length greater than 5 in th
427
427
  | "database" | Useful for querying a SQL database | | `gem "sequel", "~> 5.68.0"` |
428
428
  | "ruby_code_interpreter" | Interprets Ruby expressions | | `gem "safe_ruby", "~> 1.0.4"` |
429
429
  | "google_search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
430
- | "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
430
+ | "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY"]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
431
431
  | "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
432
432
 
433
433
  #### Loaders 🚚
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "baran"
4
+
5
+ module Langchain
6
+ module Chunker
7
+ #
8
+ # Recursive text chunker. Preferentially splits on separators.
9
+ #
10
+ # Usage:
11
+ # Langchain::Chunker::RecursiveText.new(text).chunks
12
+ #
13
+ class RecursiveText < Base
14
+ attr_reader :text, :chunk_size, :chunk_overlap, :separators
15
+
16
+ # @param [String] text
17
+ # @param [Integer] chunk_size
18
+ # @param [Integer] chunk_overlap
19
+ # @param [Array<String>] separators
20
+ def initialize(text, chunk_size: 1000, chunk_overlap: 200, separators: ["\n\n"])
21
+ @text = text
22
+ @chunk_size = chunk_size
23
+ @chunk_overlap = chunk_overlap
24
+ @separators = separators
25
+ end
26
+
27
+ # @return [Array<String>]
28
+ def chunks
29
+ splitter = Baran::RecursiveCharacterTextSplitter.new(
30
+ chunk_size: chunk_size,
31
+ chunk_overlap: chunk_overlap,
32
+ separators: separators
33
+ )
34
+ splitter.chunks(text)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -161,12 +161,16 @@ module Langchain::Vectorsearch
161
161
  end
162
162
 
163
163
  def add_data(paths:)
164
- raise ArgumentError, "Paths must be provided" if paths.to_a.empty?
164
+ raise ArgumentError, "Paths must be provided" if Array(paths).empty?
165
165
 
166
166
  texts = Array(paths)
167
167
  .flatten
168
- .map { |path| Langchain::Loader.new(path)&.load&.value }
169
- .compact
168
+ .map do |path|
169
+ data = Langchain::Loader.new(path)&.load&.chunks
170
+ data.map { |chunk| chunk[:text] }
171
+ end
172
+
173
+ texts.flatten!
170
174
 
171
175
  add_texts(texts: texts)
172
176
  end
@@ -40,20 +40,53 @@ module Langchain::Vectorsearch
40
40
  super(llm: llm)
41
41
  end
42
42
 
43
- # Add a list of texts to the index
43
+ # Upsert a list of texts to the index
44
44
  # @param texts [Array<String>] The texts to add to the index
45
- # @return [PG::Result] The response from the database
46
- def add_texts(texts:)
47
- data = texts.flat_map do |text|
48
- [text, llm.embed(text: text)]
45
+ # @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
46
+ # @return [PG::Result] The response from the database including the ids of
47
+ # the added or updated texts.
48
+ def upsert_texts(texts:, ids:)
49
+ data = texts.zip(ids).flat_map do |(text, id)|
50
+ [id, text, llm.embed(text: text)]
49
51
  end
50
- values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
52
+ values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
53
+ # see https://github.com/pgvector/pgvector#storing
51
54
  client.exec_params(
52
- "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values};",
55
+ "INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
56
+ #{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
53
57
  data
54
58
  )
55
59
  end
56
60
 
61
+ # Add a list of texts to the index
62
+ # @param texts [Array<String>] The texts to add to the index
63
+ # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
64
+ # @return [PG::Result] The response from the database including the ids of
65
+ # the added texts.
66
+ def add_texts(texts:, ids: nil)
67
+ if ids.nil? || ids.empty?
68
+ data = texts.flat_map do |text|
69
+ [text, llm.embed(text: text)]
70
+ end
71
+ values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
72
+ client.exec_params(
73
+ "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
74
+ data
75
+ )
76
+ else
77
+ upsert_texts(texts: texts, ids: ids)
78
+ end
79
+ end
80
+
81
+ # Update a list of ids and corresponding texts to the index
82
+ # @param texts [Array<String>] The texts to add to the index
83
+ # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
84
+ # @return [PG::Result] The response from the database including the ids of
85
+ # the updated texts.
86
+ def update_texts(texts:, ids:)
87
+ upsert_texts(texts: texts, ids: ids)
88
+ end
89
+
57
90
  # Create default schema
58
91
  # @return [PG::Result] The response from the database
59
92
  def create_default_schema
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.5"
4
+ VERSION = "0.6.6"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -82,6 +82,7 @@ module Langchain
82
82
  module Chunker
83
83
  autoload :Base, "langchain/chunker/base"
84
84
  autoload :Text, "langchain/chunker/text"
85
+ autoload :RecursiveText, "langchain/chunker/recursive_text"
85
86
  end
86
87
 
87
88
  module Tool
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.5
4
+ version: 0.6.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-06 00:00:00.000000000 Z
11
+ date: 2023-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -507,6 +507,7 @@ files:
507
507
  - lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
508
508
  - lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
509
509
  - lib/langchain/chunker/base.rb
510
+ - lib/langchain/chunker/recursive_text.rb
510
511
  - lib/langchain/chunker/text.rb
511
512
  - lib/langchain/contextual_logger.rb
512
513
  - lib/langchain/conversation.rb