langchainrb 0.6.5 → 0.6.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/lib/langchain/chunker/recursive_text.rb +38 -0
- data/lib/langchain/vectorsearch/base.rb +7 -3
- data/lib/langchain/vectorsearch/pgvector.rb +40 -7
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a9949f3ffd0338c90274f13b9862b0a6b9ec7b717b14b7ccaa8b6b8e0115f621
|
|
4
|
+
data.tar.gz: 43ebcb26d51b286278d5098ba50defef0c8bd1a897fa744c4519cfa10bdfdf58
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c95f6e104aaa9a8dab30c9e78e342fdf960ccfef332a2737218f3cc186521369e6f03216d5ccd08329d5110cd15ef10e10a3f460caecc02dd50e32b1b60ff8b3
|
|
7
|
+
data.tar.gz: c8c059c760b361975ea7ba8eb8a7aa24c1dd7dde5264d7d8bdf20da4f7ec80fe3f1cf4f60dd16dd8028638f3335b1e1632b655ae6c4bdd01912d33371892b5a3
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
## [Unreleased]
|
|
2
2
|
|
|
3
|
+
## [0.6.6] - 2023-07-13
|
|
4
|
+
- Langchain::Chunker::RecursiveText
|
|
5
|
+
- Fixes
|
|
6
|
+
|
|
7
|
+
## [0.6.5] - 2023-07-06
|
|
8
|
+
- 🗣️ LLMs
|
|
9
|
+
- Introducing Llama.cpp support
|
|
10
|
+
- Langchain::OutputParsers::OutputFixingParser to wrap a Langchain::OutputParser and handle invalid response
|
|
11
|
+
|
|
3
12
|
## [0.6.4] - 2023-07-01
|
|
4
13
|
- Fix `Langchain::Vectorsearch::Qdrant#add_texts()`
|
|
5
14
|
- Introduce `ConversationMemory`
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -39,7 +39,7 @@ require "langchain"
|
|
|
39
39
|
| [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
|
40
40
|
| [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
|
41
41
|
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
|
42
|
-
| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
|
42
|
+
| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
|
43
43
|
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
|
44
44
|
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
|
45
45
|
|
|
@@ -54,7 +54,7 @@ Pick the vector search database you'll be using and instantiate the client:
|
|
|
54
54
|
client = Langchain::Vectorsearch::Weaviate.new(
|
|
55
55
|
url: ENV["WEAVIATE_URL"],
|
|
56
56
|
api_key: ENV["WEAVIATE_API_KEY"],
|
|
57
|
-
|
|
57
|
+
index_name: "",
|
|
58
58
|
llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
|
59
59
|
)
|
|
60
60
|
|
|
@@ -427,7 +427,7 @@ agent.run(question: "How many users have a name with length greater than 5 in th
|
|
|
427
427
|
| "database" | Useful for querying a SQL database | | `gem "sequel", "~> 5.68.0"` |
|
|
428
428
|
| "ruby_code_interpreter" | Interprets Ruby expressions | | `gem "safe_ruby", "~> 1.0.4"` |
|
|
429
429
|
| "google_search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
|
|
430
|
-
| "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
|
|
430
|
+
| "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY"]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
|
|
431
431
|
| "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
|
|
432
432
|
|
|
433
433
|
#### Loaders 🚚
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "baran"
|
|
4
|
+
|
|
5
|
+
module Langchain
|
|
6
|
+
module Chunker
|
|
7
|
+
#
|
|
8
|
+
# Recursive text chunker. Preferentially splits on separators.
|
|
9
|
+
#
|
|
10
|
+
# Usage:
|
|
11
|
+
# Langchain::Chunker::RecursiveText.new(text).chunks
|
|
12
|
+
#
|
|
13
|
+
class RecursiveText < Base
|
|
14
|
+
attr_reader :text, :chunk_size, :chunk_overlap, :separators
|
|
15
|
+
|
|
16
|
+
# @param [String] text
|
|
17
|
+
# @param [Integer] chunk_size
|
|
18
|
+
# @param [Integer] chunk_overlap
|
|
19
|
+
# @param [Array<String>] separators
|
|
20
|
+
def initialize(text, chunk_size: 1000, chunk_overlap: 200, separators: ["\n\n"])
|
|
21
|
+
@text = text
|
|
22
|
+
@chunk_size = chunk_size
|
|
23
|
+
@chunk_overlap = chunk_overlap
|
|
24
|
+
@separators = separators
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# @return [Array<String>]
|
|
28
|
+
def chunks
|
|
29
|
+
splitter = Baran::RecursiveCharacterTextSplitter.new(
|
|
30
|
+
chunk_size: chunk_size,
|
|
31
|
+
chunk_overlap: chunk_overlap,
|
|
32
|
+
separators: separators
|
|
33
|
+
)
|
|
34
|
+
splitter.chunks(text)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
@@ -161,12 +161,16 @@ module Langchain::Vectorsearch
|
|
|
161
161
|
end
|
|
162
162
|
|
|
163
163
|
def add_data(paths:)
|
|
164
|
-
raise ArgumentError, "Paths must be provided" if paths.
|
|
164
|
+
raise ArgumentError, "Paths must be provided" if Array(paths).empty?
|
|
165
165
|
|
|
166
166
|
texts = Array(paths)
|
|
167
167
|
.flatten
|
|
168
|
-
.map
|
|
169
|
-
|
|
168
|
+
.map do |path|
|
|
169
|
+
data = Langchain::Loader.new(path)&.load&.chunks
|
|
170
|
+
data.map { |chunk| chunk[:text] }
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
texts.flatten!
|
|
170
174
|
|
|
171
175
|
add_texts(texts: texts)
|
|
172
176
|
end
|
|
@@ -40,20 +40,53 @@ module Langchain::Vectorsearch
|
|
|
40
40
|
super(llm: llm)
|
|
41
41
|
end
|
|
42
42
|
|
|
43
|
-
#
|
|
43
|
+
# Upsert a list of texts to the index
|
|
44
44
|
# @param texts [Array<String>] The texts to add to the index
|
|
45
|
-
# @
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
# @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
|
|
46
|
+
# @return [PG::Result] The response from the database including the ids of
|
|
47
|
+
# the added or updated texts.
|
|
48
|
+
def upsert_texts(texts:, ids:)
|
|
49
|
+
data = texts.zip(ids).flat_map do |(text, id)|
|
|
50
|
+
[id, text, llm.embed(text: text)]
|
|
49
51
|
end
|
|
50
|
-
values = texts.length.times.map { |i| "($#{
|
|
52
|
+
values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
|
|
53
|
+
# see https://github.com/pgvector/pgvector#storing
|
|
51
54
|
client.exec_params(
|
|
52
|
-
"INSERT INTO #{quoted_table_name} (content, vectors) VALUES
|
|
55
|
+
"INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
|
|
56
|
+
#{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
|
|
53
57
|
data
|
|
54
58
|
)
|
|
55
59
|
end
|
|
56
60
|
|
|
61
|
+
# Add a list of texts to the index
|
|
62
|
+
# @param texts [Array<String>] The texts to add to the index
|
|
63
|
+
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
|
|
64
|
+
# @return [PG::Result] The response from the database including the ids of
|
|
65
|
+
# the added texts.
|
|
66
|
+
def add_texts(texts:, ids: nil)
|
|
67
|
+
if ids.nil? || ids.empty?
|
|
68
|
+
data = texts.flat_map do |text|
|
|
69
|
+
[text, llm.embed(text: text)]
|
|
70
|
+
end
|
|
71
|
+
values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
|
|
72
|
+
client.exec_params(
|
|
73
|
+
"INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
|
|
74
|
+
data
|
|
75
|
+
)
|
|
76
|
+
else
|
|
77
|
+
upsert_texts(texts: texts, ids: ids)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Update a list of ids and corresponding texts to the index
|
|
82
|
+
# @param texts [Array<String>] The texts to add to the index
|
|
83
|
+
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
|
|
84
|
+
# @return [PG::Result] The response from the database including the ids of
|
|
85
|
+
# the updated texts.
|
|
86
|
+
def update_texts(texts:, ids:)
|
|
87
|
+
upsert_texts(texts: texts, ids: ids)
|
|
88
|
+
end
|
|
89
|
+
|
|
57
90
|
# Create default schema
|
|
58
91
|
# @return [PG::Result] The response from the database
|
|
59
92
|
def create_default_schema
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: langchainrb
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.6.
|
|
4
|
+
version: 0.6.6
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrei Bondarev
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-07-
|
|
11
|
+
date: 2023-07-14 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: baran
|
|
@@ -507,6 +507,7 @@ files:
|
|
|
507
507
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
|
|
508
508
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
|
|
509
509
|
- lib/langchain/chunker/base.rb
|
|
510
|
+
- lib/langchain/chunker/recursive_text.rb
|
|
510
511
|
- lib/langchain/chunker/text.rb
|
|
511
512
|
- lib/langchain/contextual_logger.rb
|
|
512
513
|
- lib/langchain/conversation.rb
|