langchainrb 0.3.14 → 0.3.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/Gemfile.lock +3 -3
- data/lib/langchain/loader.rb +32 -24
- data/lib/langchain/processors/base.rb +4 -0
- data/lib/langchain/processors/csv.rb +7 -1
- data/lib/langchain/processors/docx.rb +1 -1
- data/lib/langchain/processors/html.rb +1 -1
- data/lib/langchain/processors/pdf.rb +1 -1
- data/lib/version.rb +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4f855e3c0e1f0d7b59e0255004a1a806c7048da6d3fe0a8ddf10be68e36ed9ba
|
4
|
+
data.tar.gz: 5758c90205c3e2bea420cf7fa0dec07638917beced60bd0482e1d803ced96c07
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec26f8c4257a6949d829d7f68d2175943b80c2837739bdbead8e6b61891a6738cf84ec1934caff777b63dee39f3d0111e8f29a81c37c125be1cd3ae8137b6968
|
7
|
+
data.tar.gz: 31bb1aa0296dbbc8e1a1c6c2bc7236c92bf3e627935c752bd527c7f7d769da69f11c6bb63de6e0b4e0d1d8d0c739744045669220c35e253ae83eaa0e23482e6a
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [0.3.15] - 2023-05-30
|
4
|
+
- Drop Ruby 2.7 support. It had reached EOD.
|
5
|
+
- Bump pgvector-ruby to 0.2
|
6
|
+
- 🚚 Loaders
|
7
|
+
- Support for options and block to be passed to CSV processor
|
8
|
+
|
3
9
|
## [0.3.14] - 2023-05-28
|
4
10
|
- 🔍 Vectorsearch
|
5
11
|
- Not relying on Weaviate modules anymore
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
langchainrb (0.3.
|
4
|
+
langchainrb (0.3.15)
|
5
5
|
|
6
6
|
GEM
|
7
7
|
remote: https://rubygems.org/
|
@@ -171,7 +171,7 @@ GEM
|
|
171
171
|
ruby-rc4
|
172
172
|
ttfunk
|
173
173
|
pg (1.5.3)
|
174
|
-
pgvector (0.
|
174
|
+
pgvector (0.2.0)
|
175
175
|
pinecone (0.1.71)
|
176
176
|
dry-struct (~> 1.6.0)
|
177
177
|
dry-validation (~> 1.10.0)
|
@@ -298,7 +298,7 @@ DEPENDENCIES
|
|
298
298
|
nokogiri (~> 1.13)
|
299
299
|
pdf-reader (~> 1.4)
|
300
300
|
pg (~> 1.5)
|
301
|
-
pgvector (
|
301
|
+
pgvector (~> 0.2)
|
302
302
|
pinecone (~> 0.1.6)
|
303
303
|
pry-byebug (~> 3.10.0)
|
304
304
|
qdrant-ruby (~> 0.9.0)
|
data/lib/langchain/loader.rb
CHANGED
@@ -14,14 +14,15 @@ module Langchain
|
|
14
14
|
# Equivalent to Langchain::Loader.new(path).load
|
15
15
|
# @param path [String | Pathname] path to file or url
|
16
16
|
# @return [String] file content
|
17
|
-
def self.load(path)
|
18
|
-
new(path).load
|
17
|
+
def self.load(path, options = {}, &block)
|
18
|
+
new(path, options).load(&block)
|
19
19
|
end
|
20
20
|
|
21
21
|
# Initialize Langchain::Loader
|
22
22
|
# @param path [String | Pathname] path to file or url
|
23
23
|
# @return [Langchain::Loader] loader instance
|
24
|
-
def initialize(path)
|
24
|
+
def initialize(path, options = {})
|
25
|
+
@options = options
|
25
26
|
@path = path
|
26
27
|
end
|
27
28
|
|
@@ -35,39 +36,38 @@ module Langchain
|
|
35
36
|
|
36
37
|
# Load data from a file or url
|
37
38
|
# @return [String] file content
|
38
|
-
def load
|
39
|
-
url? ?
|
39
|
+
def load(&block)
|
40
|
+
@raw_data = url? ? load_from_url : load_from_path
|
41
|
+
|
42
|
+
data = if block
|
43
|
+
yield @raw_data.read, @options
|
44
|
+
else
|
45
|
+
processor_klass.new(@options).parse(@raw_data)
|
46
|
+
end
|
47
|
+
|
48
|
+
Langchain::Data.new(data, source: @path)
|
40
49
|
end
|
41
50
|
|
42
51
|
private
|
43
52
|
|
44
|
-
def
|
45
|
-
|
46
|
-
data = URI.parse(url).open
|
47
|
-
processor = find_processor(:CONTENT_TYPES, data.content_type)
|
48
|
-
[data, processor]
|
49
|
-
end
|
53
|
+
def load_from_url
|
54
|
+
URI.parse(@path).open
|
50
55
|
end
|
51
56
|
|
52
|
-
def
|
53
|
-
raise FileNotFound unless File.exist?(path)
|
57
|
+
def load_from_path
|
58
|
+
raise FileNotFound unless File.exist?(@path)
|
54
59
|
|
55
|
-
|
56
|
-
[File.open(path), find_processor(:EXTENSIONS, File.extname(path))]
|
57
|
-
end
|
60
|
+
File.open(@path)
|
58
61
|
end
|
59
62
|
|
60
|
-
def
|
61
|
-
|
63
|
+
def processor_klass
|
64
|
+
raise UnknownFormatError unless (kind = find_processor)
|
62
65
|
|
63
|
-
|
64
|
-
|
65
|
-
processor = Langchain::Processors.const_get(kind).new
|
66
|
-
Langchain::Data.new(processor.parse(raw_data), source: @path)
|
66
|
+
Langchain::Processors.const_get(kind)
|
67
67
|
end
|
68
68
|
|
69
|
-
def find_processor
|
70
|
-
processors.find { |klass| processor_matches? "#{klass}::#{
|
69
|
+
def find_processor
|
70
|
+
processors.find { |klass| processor_matches? "#{klass}::#{lookup_constant}", source_type }
|
71
71
|
end
|
72
72
|
|
73
73
|
def processor_matches?(constant, value)
|
@@ -77,5 +77,13 @@ module Langchain
|
|
77
77
|
def processors
|
78
78
|
Langchain::Processors.constants
|
79
79
|
end
|
80
|
+
|
81
|
+
def source_type
|
82
|
+
url? ? @raw_data.content_type : File.extname(@path)
|
83
|
+
end
|
84
|
+
|
85
|
+
def lookup_constant
|
86
|
+
url? ? :CONTENT_TYPES : :EXTENSIONS
|
87
|
+
end
|
80
88
|
end
|
81
89
|
end
|
@@ -12,10 +12,16 @@ module Langchain
|
|
12
12
|
# @param [File] data
|
13
13
|
# @return [Array of Hash]
|
14
14
|
def parse(data)
|
15
|
-
::CSV.new(data.read).map do |row|
|
15
|
+
::CSV.new(data.read, col_sep: separator).map do |row|
|
16
16
|
row.map(&:strip)
|
17
17
|
end
|
18
18
|
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def separator
|
23
|
+
@options[:col_sep] || ","
|
24
|
+
end
|
19
25
|
end
|
20
26
|
end
|
21
27
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-05-
|
11
|
+
date: 2023-05-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: dotenv-rails
|
@@ -182,14 +182,14 @@ dependencies:
|
|
182
182
|
name: pgvector
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
184
184
|
requirements:
|
185
|
-
- - "
|
185
|
+
- - "~>"
|
186
186
|
- !ruby/object:Gem::Version
|
187
187
|
version: '0.2'
|
188
188
|
type: :development
|
189
189
|
prerelease: false
|
190
190
|
version_requirements: !ruby/object:Gem::Requirement
|
191
191
|
requirements:
|
192
|
-
- - "
|
192
|
+
- - "~>"
|
193
193
|
- !ruby/object:Gem::Version
|
194
194
|
version: '0.2'
|
195
195
|
- !ruby/object:Gem::Dependency
|
@@ -290,7 +290,7 @@ dependencies:
|
|
290
290
|
- - "~>"
|
291
291
|
- !ruby/object:Gem::Version
|
292
292
|
version: 1.17.0
|
293
|
-
description: Build
|
293
|
+
description: Build LLM-backed Ruby applications with Ruby's LangChain
|
294
294
|
email:
|
295
295
|
- andrei.bondarev13@gmail.com
|
296
296
|
executables: []
|
@@ -366,15 +366,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
366
366
|
requirements:
|
367
367
|
- - ">="
|
368
368
|
- !ruby/object:Gem::Version
|
369
|
-
version:
|
369
|
+
version: 3.0.0
|
370
370
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
371
371
|
requirements:
|
372
372
|
- - ">="
|
373
373
|
- !ruby/object:Gem::Version
|
374
374
|
version: '0'
|
375
375
|
requirements: []
|
376
|
-
rubygems_version: 3.
|
376
|
+
rubygems_version: 3.3.7
|
377
377
|
signing_key:
|
378
378
|
specification_version: 4
|
379
|
-
summary: Build
|
379
|
+
summary: Build LLM-backed Ruby applications with Ruby's LangChain
|
380
380
|
test_files: []
|