langchainrb 0.3.14 → 0.3.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Gemfile.lock +3 -3
- data/lib/langchain/loader.rb +32 -24
- data/lib/langchain/processors/base.rb +4 -0
- data/lib/langchain/processors/csv.rb +7 -1
- data/lib/langchain/processors/docx.rb +1 -1
- data/lib/langchain/processors/html.rb +1 -1
- data/lib/langchain/processors/pdf.rb +1 -1
- data/lib/version.rb +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4f855e3c0e1f0d7b59e0255004a1a806c7048da6d3fe0a8ddf10be68e36ed9ba
|
4
|
+
data.tar.gz: 5758c90205c3e2bea420cf7fa0dec07638917beced60bd0482e1d803ced96c07
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec26f8c4257a6949d829d7f68d2175943b80c2837739bdbead8e6b61891a6738cf84ec1934caff777b63dee39f3d0111e8f29a81c37c125be1cd3ae8137b6968
|
7
|
+
data.tar.gz: 31bb1aa0296dbbc8e1a1c6c2bc7236c92bf3e627935c752bd527c7f7d769da69f11c6bb63de6e0b4e0d1d8d0c739744045669220c35e253ae83eaa0e23482e6a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.3.15] - 2023-05-30
|
4
|
+
- Drop Ruby 2.7 support. It had reached EOD.
|
5
|
+
- Bump pgvector-ruby to 0.2
|
6
|
+
- 🚚 Loaders
|
7
|
+
- Support for options and block to be passed to CSV processor
|
8
|
+
|
3
9
|
## [0.3.14] - 2023-05-28
|
4
10
|
- 🔍 Vectorsearch
|
5
11
|
- Not relying on Weaviate modules anymore
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb (0.3.
|
4
|
+
langchainrb (0.3.15)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -171,7 +171,7 @@ GEM
|
|
171
171
|
ruby-rc4
|
172
172
|
ttfunk
|
173
173
|
pg (1.5.3)
|
174
|
-
pgvector (0.
|
174
|
+
pgvector (0.2.0)
|
175
175
|
pinecone (0.1.71)
|
176
176
|
dry-struct (~> 1.6.0)
|
177
177
|
dry-validation (~> 1.10.0)
|
@@ -298,7 +298,7 @@ DEPENDENCIES
|
|
298
298
|
nokogiri (~> 1.13)
|
299
299
|
pdf-reader (~> 1.4)
|
300
300
|
pg (~> 1.5)
|
301
|
-
pgvector (
|
301
|
+
pgvector (~> 0.2)
|
302
302
|
pinecone (~> 0.1.6)
|
303
303
|
pry-byebug (~> 3.10.0)
|
304
304
|
qdrant-ruby (~> 0.9.0)
|
data/lib/langchain/loader.rb
CHANGED
@@ -14,14 +14,15 @@ module Langchain
|
|
14
14
|
# Equivalent to Langchain::Loader.new(path).load
|
15
15
|
# @param path [String | Pathname] path to file or url
|
16
16
|
# @return [String] file content
|
17
|
-
def self.load(path)
|
18
|
-
new(path).load
|
17
|
+
def self.load(path, options = {}, &block)
|
18
|
+
new(path, options).load(&block)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Initialize Langchain::Loader
|
22
22
|
# @param path [String | Pathname] path to file or url
|
23
23
|
# @return [Langchain::Loader] loader instance
|
24
|
-
def initialize(path)
|
24
|
+
def initialize(path, options = {})
|
25
|
+
@options = options
|
25
26
|
@path = path
|
26
27
|
end
|
27
28
|
|
@@ -35,39 +36,38 @@ module Langchain
|
|
35
36
|
|
36
37
|
# Load data from a file or url
|
37
38
|
# @return [String] file content
|
38
|
-
def load
|
39
|
-
url? ?
|
39
|
+
def load(&block)
|
40
|
+
@raw_data = url? ? load_from_url : load_from_path
|
41
|
+
|
42
|
+
data = if block
|
43
|
+
yield @raw_data.read, @options
|
44
|
+
else
|
45
|
+
processor_klass.new(@options).parse(@raw_data)
|
46
|
+
end
|
47
|
+
|
48
|
+
Langchain::Data.new(data, source: @path)
|
40
49
|
end
|
41
50
|
|
42
51
|
private
|
43
52
|
|
44
|
-
def
|
45
|
-
|
46
|
-
data = URI.parse(url).open
|
47
|
-
processor = find_processor(:CONTENT_TYPES, data.content_type)
|
48
|
-
[data, processor]
|
49
|
-
end
|
53
|
+
def load_from_url
|
54
|
+
URI.parse(@path).open
|
50
55
|
end
|
51
56
|
|
52
|
-
def
|
53
|
-
raise FileNotFound unless File.exist?(path)
|
57
|
+
def load_from_path
|
58
|
+
raise FileNotFound unless File.exist?(@path)
|
54
59
|
|
55
|
-
|
56
|
-
[File.open(path), find_processor(:EXTENSIONS, File.extname(path))]
|
57
|
-
end
|
60
|
+
File.open(@path)
|
58
61
|
end
|
59
62
|
|
60
|
-
def
|
61
|
-
|
63
|
+
def processor_klass
|
64
|
+
raise UnknownFormatError unless (kind = find_processor)
|
62
65
|
|
63
|
-
|
64
|
-
|
65
|
-
processor = Langchain::Processors.const_get(kind).new
|
66
|
-
Langchain::Data.new(processor.parse(raw_data), source: @path)
|
66
|
+
Langchain::Processors.const_get(kind)
|
67
67
|
end
|
68
68
|
|
69
|
-
def find_processor
|
70
|
-
processors.find { |klass| processor_matches? "#{klass}::#{
|
69
|
+
def find_processor
|
70
|
+
processors.find { |klass| processor_matches? "#{klass}::#{lookup_constant}", source_type }
|
71
71
|
end
|
72
72
|
|
73
73
|
def processor_matches?(constant, value)
|
@@ -77,5 +77,13 @@ module Langchain
|
|
77
77
|
def processors
|
78
78
|
Langchain::Processors.constants
|
79
79
|
end
|
80
|
+
|
81
|
+
def source_type
|
82
|
+
url? ? @raw_data.content_type : File.extname(@path)
|
83
|
+
end
|
84
|
+
|
85
|
+
def lookup_constant
|
86
|
+
url? ? :CONTENT_TYPES : :EXTENSIONS
|
87
|
+
end
|
80
88
|
end
|
81
89
|
end
|
@@ -12,10 +12,16 @@ module Langchain
|
|
12
12
|
# @param [File] data
|
13
13
|
# @return [Array of Hash]
|
14
14
|
def parse(data)
|
15
|
-
::CSV.new(data.read).map do |row|
|
15
|
+
::CSV.new(data.read, col_sep: separator).map do |row|
|
16
16
|
row.map(&:strip)
|
17
17
|
end
|
18
18
|
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def separator
|
23
|
+
@options[:col_sep] || ","
|
24
|
+
end
|
19
25
|
end
|
20
26
|
end
|
21
27
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05-
|
11
|
+
date: 2023-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: dotenv-rails
|
@@ -182,14 +182,14 @@ dependencies:
|
|
182
182
|
name: pgvector
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
184
184
|
requirements:
|
185
|
-
- - "
|
185
|
+
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
187
|
version: '0.2'
|
188
188
|
type: :development
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
|
-
- - "
|
192
|
+
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
194
|
version: '0.2'
|
195
195
|
- !ruby/object:Gem::Dependency
|
@@ -290,7 +290,7 @@ dependencies:
|
|
290
290
|
- - "~>"
|
291
291
|
- !ruby/object:Gem::Version
|
292
292
|
version: 1.17.0
|
293
|
-
description: Build
|
293
|
+
description: Build LLM-backed Ruby applications with Ruby's LangChain
|
294
294
|
email:
|
295
295
|
- andrei.bondarev13@gmail.com
|
296
296
|
executables: []
|
@@ -366,15 +366,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
366
366
|
requirements:
|
367
367
|
- - ">="
|
368
368
|
- !ruby/object:Gem::Version
|
369
|
-
version:
|
369
|
+
version: 3.0.0
|
370
370
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
371
371
|
requirements:
|
372
372
|
- - ">="
|
373
373
|
- !ruby/object:Gem::Version
|
374
374
|
version: '0'
|
375
375
|
requirements: []
|
376
|
-
rubygems_version: 3.
|
376
|
+
rubygems_version: 3.3.7
|
377
377
|
signing_key:
|
378
378
|
specification_version: 4
|
379
|
-
summary: Build
|
379
|
+
summary: Build LLM-backed Ruby applications with Ruby's LangChain
|
380
380
|
test_files: []
|