langchainrb 0.6.5 → 0.6.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3404535e036c3efe68fd12706d2ebb269caed87b562fc38434122b1be01a356d
4
- data.tar.gz: e3be77b32cf754235e8895fb1af60edca54cb5acb84278bfa2e39b6ed7c2abbe
3
+ metadata.gz: a9949f3ffd0338c90274f13b9862b0a6b9ec7b717b14b7ccaa8b6b8e0115f621
4
+ data.tar.gz: 43ebcb26d51b286278d5098ba50defef0c8bd1a897fa744c4519cfa10bdfdf58
5
5
  SHA512:
6
- metadata.gz: b3fae04c73176c758c2d2d32c3ac538f3e094eb10f378b9a8befbbdcc62b60e55941a1bfefcb61eac7daca43ef91d0e57306dbc26bd59afbdad6ab4efff2ba89
7
- data.tar.gz: 626bb4a226112ee6fe709077a6d49ba91c0483fee657848153e9cff61693183709aede5844237c24cc02c561f59be82ea1fd296fe1c3f4ee4d971494ee4dcd75
6
+ metadata.gz: c95f6e104aaa9a8dab30c9e78e342fdf960ccfef332a2737218f3cc186521369e6f03216d5ccd08329d5110cd15ef10e10a3f460caecc02dd50e32b1b60ff8b3
7
+ data.tar.gz: c8c059c760b361975ea7ba8eb8a7aa24c1dd7dde5264d7d8bdf20da4f7ec80fe3f1cf4f60dd16dd8028638f3335b1e1632b655ae6c4bdd01912d33371892b5a3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,14 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.6.6] - 2023-07-13
4
+ - Langchain::Chunker::RecursiveText
5
+ - Fixes
6
+
7
+ ## [0.6.5] - 2023-07-06
8
+ - 🗣️ LLMs
9
+ - Introducing Llama.cpp support
10
+ - Langchain::OutputParsers::OutputFixingParser to wrap a Langchain::OutputParser and handle invalid response
11
+
3
12
  ## [0.6.4] - 2023-07-01
4
13
  - Fix `Langchain::Vectorsearch::Qdrant#add_texts()`
5
14
  - Introduce `ConversationMemory`
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.6.5)
4
+ langchainrb (0.6.6)
5
5
  baran (~> 0.1.6)
6
6
  colorize (~> 0.8.1)
7
7
  json-schema (~> 4.0.0)
data/README.md CHANGED
@@ -39,7 +39,7 @@ require "langchain"
39
39
  | [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
40
40
  | [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
41
41
  | [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
42
- | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
42
+ | [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
43
43
  | [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
44
44
  | [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
45
45
 
@@ -54,7 +54,7 @@ Pick the vector search database you'll be using and instantiate the client:
54
54
  client = Langchain::Vectorsearch::Weaviate.new(
55
55
  url: ENV["WEAVIATE_URL"],
56
56
  api_key: ENV["WEAVIATE_API_KEY"],
57
- index: "",
57
+ index_name: "",
58
58
  llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
59
59
  )
60
60
 
@@ -427,7 +427,7 @@ agent.run(question: "How many users have a name with length greater than 5 in th
427
427
  | "database" | Useful for querying a SQL database | | `gem "sequel", "~> 5.68.0"` |
428
428
  | "ruby_code_interpreter" | Interprets Ruby expressions | | `gem "safe_ruby", "~> 1.0.4"` |
429
429
  | "google_search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
430
- | "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
430
+ | "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY"]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
431
431
  | "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
432
432
 
433
433
  #### Loaders 🚚
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "baran"
4
+
5
+ module Langchain
6
+ module Chunker
7
+ #
8
+ # Recursive text chunker. Preferentially splits on separators.
9
+ #
10
+ # Usage:
11
+ # Langchain::Chunker::RecursiveText.new(text).chunks
12
+ #
13
+ class RecursiveText < Base
14
+ attr_reader :text, :chunk_size, :chunk_overlap, :separators
15
+
16
+ # @param [String] text
17
+ # @param [Integer] chunk_size
18
+ # @param [Integer] chunk_overlap
19
+ # @param [Array<String>] separators
20
+ def initialize(text, chunk_size: 1000, chunk_overlap: 200, separators: ["\n\n"])
21
+ @text = text
22
+ @chunk_size = chunk_size
23
+ @chunk_overlap = chunk_overlap
24
+ @separators = separators
25
+ end
26
+
27
+ # @return [Array<String>]
28
+ def chunks
29
+ splitter = Baran::RecursiveCharacterTextSplitter.new(
30
+ chunk_size: chunk_size,
31
+ chunk_overlap: chunk_overlap,
32
+ separators: separators
33
+ )
34
+ splitter.chunks(text)
35
+ end
36
+ end
37
+ end
38
+ end
@@ -161,12 +161,16 @@ module Langchain::Vectorsearch
161
161
  end
162
162
 
163
163
  def add_data(paths:)
164
- raise ArgumentError, "Paths must be provided" if paths.to_a.empty?
164
+ raise ArgumentError, "Paths must be provided" if Array(paths).empty?
165
165
 
166
166
  texts = Array(paths)
167
167
  .flatten
168
- .map { |path| Langchain::Loader.new(path)&.load&.value }
169
- .compact
168
+ .map do |path|
169
+ data = Langchain::Loader.new(path)&.load&.chunks
170
+ data.map { |chunk| chunk[:text] }
171
+ end
172
+
173
+ texts.flatten!
170
174
 
171
175
  add_texts(texts: texts)
172
176
  end
@@ -40,20 +40,53 @@ module Langchain::Vectorsearch
40
40
  super(llm: llm)
41
41
  end
42
42
 
43
- # Add a list of texts to the index
43
+ # Upsert a list of texts to the index
44
44
  # @param texts [Array<String>] The texts to add to the index
45
- # @return [PG::Result] The response from the database
46
- def add_texts(texts:)
47
- data = texts.flat_map do |text|
48
- [text, llm.embed(text: text)]
45
+ # @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
46
+ # @return [PG::Result] The response from the database including the ids of
47
+ # the added or updated texts.
48
+ def upsert_texts(texts:, ids:)
49
+ data = texts.zip(ids).flat_map do |(text, id)|
50
+ [id, text, llm.embed(text: text)]
49
51
  end
50
- values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
52
+ values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
53
+ # see https://github.com/pgvector/pgvector#storing
51
54
  client.exec_params(
52
- "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values};",
55
+ "INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
56
+ #{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
53
57
  data
54
58
  )
55
59
  end
56
60
 
61
+ # Add a list of texts to the index
62
+ # @param texts [Array<String>] The texts to add to the index
63
+ # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
64
+ # @return [PG::Result] The response from the database including the ids of
65
+ # the added texts.
66
+ def add_texts(texts:, ids: nil)
67
+ if ids.nil? || ids.empty?
68
+ data = texts.flat_map do |text|
69
+ [text, llm.embed(text: text)]
70
+ end
71
+ values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
72
+ client.exec_params(
73
+ "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
74
+ data
75
+ )
76
+ else
77
+ upsert_texts(texts: texts, ids: ids)
78
+ end
79
+ end
80
+
81
+ # Update a list of ids and corresponding texts to the index
82
+ # @param texts [Array<String>] The texts to add to the index
83
+ # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
84
+ # @return [PG::Result] The response from the database including the ids of
85
+ # the updated texts.
86
+ def update_texts(texts:, ids:)
87
+ upsert_texts(texts: texts, ids: ids)
88
+ end
89
+
57
90
  # Create default schema
58
91
  # @return [PG::Result] The response from the database
59
92
  def create_default_schema
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.6.5"
4
+ VERSION = "0.6.6"
5
5
  end
data/lib/langchain.rb CHANGED
@@ -82,6 +82,7 @@ module Langchain
82
82
  module Chunker
83
83
  autoload :Base, "langchain/chunker/base"
84
84
  autoload :Text, "langchain/chunker/text"
85
+ autoload :RecursiveText, "langchain/chunker/recursive_text"
85
86
  end
86
87
 
87
88
  module Tool
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.5
4
+ version: 0.6.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-07-06 00:00:00.000000000 Z
11
+ date: 2023-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: baran
@@ -507,6 +507,7 @@ files:
507
507
  - lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
508
508
  - lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
509
509
  - lib/langchain/chunker/base.rb
510
+ - lib/langchain/chunker/recursive_text.rb
510
511
  - lib/langchain/chunker/text.rb
511
512
  - lib/langchain/contextual_logger.rb
512
513
  - lib/langchain/conversation.rb