langchainrb 0.6.5 → 0.6.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +1 -1
- data/README.md +3 -3
- data/lib/langchain/chunker/recursive_text.rb +38 -0
- data/lib/langchain/vectorsearch/base.rb +7 -3
- data/lib/langchain/vectorsearch/pgvector.rb +40 -7
- data/lib/langchain/version.rb +1 -1
- data/lib/langchain.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9949f3ffd0338c90274f13b9862b0a6b9ec7b717b14b7ccaa8b6b8e0115f621
|
4
|
+
data.tar.gz: 43ebcb26d51b286278d5098ba50defef0c8bd1a897fa744c4519cfa10bdfdf58
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c95f6e104aaa9a8dab30c9e78e342fdf960ccfef332a2737218f3cc186521369e6f03216d5ccd08329d5110cd15ef10e10a3f460caecc02dd50e32b1b60ff8b3
|
7
|
+
data.tar.gz: c8c059c760b361975ea7ba8eb8a7aa24c1dd7dde5264d7d8bdf20da4f7ec80fe3f1cf4f60dd16dd8028638f3335b1e1632b655ae6c4bdd01912d33371892b5a3
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.6.6] - 2023-07-13
|
4
|
+
- Langchain::Chunker::RecursiveText
|
5
|
+
- Fixes
|
6
|
+
|
7
|
+
## [0.6.5] - 2023-07-06
|
8
|
+
- 🗣️ LLMs
|
9
|
+
- Introducing Llama.cpp support
|
10
|
+
- Langchain::OutputParsers::OutputFixingParser to wrap a Langchain::OutputParser and handle invalid response
|
11
|
+
|
3
12
|
## [0.6.4] - 2023-07-01
|
4
13
|
- Fix `Langchain::Vectorsearch::Qdrant#add_texts()`
|
5
14
|
- Introduce `ConversationMemory`
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -39,7 +39,7 @@ require "langchain"
|
|
39
39
|
| [Hnswlib](https://github.com/nmslib/hnswlib/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
40
40
|
| [Milvus](https://milvus.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | WIP |
|
41
41
|
| [Pinecone](https://www.pinecone.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
42
|
-
| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP |
|
42
|
+
| [Pgvector](https://github.com/pgvector/pgvector) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
43
43
|
| [Qdrant](https://qdrant.tech/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
44
44
|
| [Weaviate](https://weaviate.io/) | :white_check_mark: | :white_check_mark: | :white_check_mark: | WIP | :white_check_mark: |
|
45
45
|
|
@@ -54,7 +54,7 @@ Pick the vector search database you'll be using and instantiate the client:
|
|
54
54
|
client = Langchain::Vectorsearch::Weaviate.new(
|
55
55
|
url: ENV["WEAVIATE_URL"],
|
56
56
|
api_key: ENV["WEAVIATE_API_KEY"],
|
57
|
-
|
57
|
+
index_name: "",
|
58
58
|
llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
|
59
59
|
)
|
60
60
|
|
@@ -427,7 +427,7 @@ agent.run(question: "How many users have a name with length greater than 5 in th
|
|
427
427
|
| "database" | Useful for querying a SQL database | | `gem "sequel", "~> 5.68.0"` |
|
428
428
|
| "ruby_code_interpreter" | Interprets Ruby expressions | | `gem "safe_ruby", "~> 1.0.4"` |
|
429
429
|
| "google_search" | A wrapper around Google Search | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
|
430
|
-
| "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
|
430
|
+
| "weather" | Calls Open Weather API to retrieve the current weather | `ENV["OPEN_WEATHER_API_KEY"]` (https://home.openweathermap.org/api_keys) | `gem "open-weather-ruby-client", "~> 0.3.0"` |
|
431
431
|
| "wikipedia" | Calls Wikipedia API to retrieve the summary | | `gem "wikipedia-client", "~> 1.17.0"` |
|
432
432
|
|
433
433
|
#### Loaders 🚚
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "baran"
|
4
|
+
|
5
|
+
module Langchain
|
6
|
+
module Chunker
|
7
|
+
#
|
8
|
+
# Recursive text chunker. Preferentially splits on separators.
|
9
|
+
#
|
10
|
+
# Usage:
|
11
|
+
# Langchain::Chunker::RecursiveText.new(text).chunks
|
12
|
+
#
|
13
|
+
class RecursiveText < Base
|
14
|
+
attr_reader :text, :chunk_size, :chunk_overlap, :separators
|
15
|
+
|
16
|
+
# @param [String] text
|
17
|
+
# @param [Integer] chunk_size
|
18
|
+
# @param [Integer] chunk_overlap
|
19
|
+
# @param [Array<String>] separators
|
20
|
+
def initialize(text, chunk_size: 1000, chunk_overlap: 200, separators: ["\n\n"])
|
21
|
+
@text = text
|
22
|
+
@chunk_size = chunk_size
|
23
|
+
@chunk_overlap = chunk_overlap
|
24
|
+
@separators = separators
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Array<String>]
|
28
|
+
def chunks
|
29
|
+
splitter = Baran::RecursiveCharacterTextSplitter.new(
|
30
|
+
chunk_size: chunk_size,
|
31
|
+
chunk_overlap: chunk_overlap,
|
32
|
+
separators: separators
|
33
|
+
)
|
34
|
+
splitter.chunks(text)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -161,12 +161,16 @@ module Langchain::Vectorsearch
|
|
161
161
|
end
|
162
162
|
|
163
163
|
def add_data(paths:)
|
164
|
-
raise ArgumentError, "Paths must be provided" if paths.
|
164
|
+
raise ArgumentError, "Paths must be provided" if Array(paths).empty?
|
165
165
|
|
166
166
|
texts = Array(paths)
|
167
167
|
.flatten
|
168
|
-
.map
|
169
|
-
|
168
|
+
.map do |path|
|
169
|
+
data = Langchain::Loader.new(path)&.load&.chunks
|
170
|
+
data.map { |chunk| chunk[:text] }
|
171
|
+
end
|
172
|
+
|
173
|
+
texts.flatten!
|
170
174
|
|
171
175
|
add_texts(texts: texts)
|
172
176
|
end
|
@@ -40,20 +40,53 @@ module Langchain::Vectorsearch
|
|
40
40
|
super(llm: llm)
|
41
41
|
end
|
42
42
|
|
43
|
-
#
|
43
|
+
# Upsert a list of texts to the index
|
44
44
|
# @param texts [Array<String>] The texts to add to the index
|
45
|
-
# @
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
# @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
|
46
|
+
# @return [PG::Result] The response from the database including the ids of
|
47
|
+
# the added or updated texts.
|
48
|
+
def upsert_texts(texts:, ids:)
|
49
|
+
data = texts.zip(ids).flat_map do |(text, id)|
|
50
|
+
[id, text, llm.embed(text: text)]
|
49
51
|
end
|
50
|
-
values = texts.length.times.map { |i| "($#{
|
52
|
+
values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
|
53
|
+
# see https://github.com/pgvector/pgvector#storing
|
51
54
|
client.exec_params(
|
52
|
-
"INSERT INTO #{quoted_table_name} (content, vectors) VALUES
|
55
|
+
"INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
|
56
|
+
#{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
|
53
57
|
data
|
54
58
|
)
|
55
59
|
end
|
56
60
|
|
61
|
+
# Add a list of texts to the index
|
62
|
+
# @param texts [Array<String>] The texts to add to the index
|
63
|
+
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
|
64
|
+
# @return [PG::Result] The response from the database including the ids of
|
65
|
+
# the added texts.
|
66
|
+
def add_texts(texts:, ids: nil)
|
67
|
+
if ids.nil? || ids.empty?
|
68
|
+
data = texts.flat_map do |text|
|
69
|
+
[text, llm.embed(text: text)]
|
70
|
+
end
|
71
|
+
values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
|
72
|
+
client.exec_params(
|
73
|
+
"INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
|
74
|
+
data
|
75
|
+
)
|
76
|
+
else
|
77
|
+
upsert_texts(texts: texts, ids: ids)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# Update a list of ids and corresponding texts to the index
|
82
|
+
# @param texts [Array<String>] The texts to add to the index
|
83
|
+
# @param ids [Array<String>] The ids to add to the index, in the same order as the texts
|
84
|
+
# @return [PG::Result] The response from the database including the ids of
|
85
|
+
# the updated texts.
|
86
|
+
def update_texts(texts:, ids:)
|
87
|
+
upsert_texts(texts: texts, ids: ids)
|
88
|
+
end
|
89
|
+
|
57
90
|
# Create default schema
|
58
91
|
# @return [PG::Result] The response from the database
|
59
92
|
def create_default_schema
|
data/lib/langchain/version.rb
CHANGED
data/lib/langchain.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-07-
|
11
|
+
date: 2023-07-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: baran
|
@@ -507,6 +507,7 @@ files:
|
|
507
507
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.yaml
|
508
508
|
- lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.yaml
|
509
509
|
- lib/langchain/chunker/base.rb
|
510
|
+
- lib/langchain/chunker/recursive_text.rb
|
510
511
|
- lib/langchain/chunker/text.rb
|
511
512
|
- lib/langchain/contextual_logger.rb
|
512
513
|
- lib/langchain/conversation.rb
|