langchainrb 0.3.14 → 0.3.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33c9436ac8d6a73dc06d30f63c11e4f246b3705aa8934765a53ee59325c3a9cd
4
- data.tar.gz: 9cc85603694f9367dd162e25379029a345aa0b5c88cccf303c2af114d43a4010
3
+ metadata.gz: 4f855e3c0e1f0d7b59e0255004a1a806c7048da6d3fe0a8ddf10be68e36ed9ba
4
+ data.tar.gz: 5758c90205c3e2bea420cf7fa0dec07638917beced60bd0482e1d803ced96c07
5
5
  SHA512:
6
- metadata.gz: ca5e81638625939d11999a64d44c92fc57c762a934aa8fd5b110c3f5aacc9a736ab5f02da4366e7a1b9b9ec0335dd1eb1683f5b9d90bd97c81914ea0a698dc7c
7
- data.tar.gz: aff49ef9451bcbc9a97d181757a5b913737cbfbb4fc3ca49d423cbd2e59a4a71091816e98c2996ff7f8292cb6e0c0d69931a4f3e4e36e2a69fdd7f745640e266
6
+ metadata.gz: ec26f8c4257a6949d829d7f68d2175943b80c2837739bdbead8e6b61891a6738cf84ec1934caff777b63dee39f3d0111e8f29a81c37c125be1cd3ae8137b6968
7
+ data.tar.gz: 31bb1aa0296dbbc8e1a1c6c2bc7236c92bf3e627935c752bd527c7f7d769da69f11c6bb63de6e0b4e0d1d8d0c739744045669220c35e253ae83eaa0e23482e6a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.15] - 2023-05-30
4
+ - Drop Ruby 2.7 support. It had reached EOD.
5
+ - Bump pgvector-ruby to 0.2
6
+ - 🚚 Loaders
7
+ - Support for options and block to be passed to CSV processor
8
+
3
9
  ## [0.3.14] - 2023-05-28
4
10
  - 🔍 Vectorsearch
5
11
  - Not relying on Weaviate modules anymore
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.14)
4
+ langchainrb (0.3.15)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -171,7 +171,7 @@ GEM
171
171
  ruby-rc4
172
172
  ttfunk
173
173
  pg (1.5.3)
174
- pgvector (0.1.1)
174
+ pgvector (0.2.0)
175
175
  pinecone (0.1.71)
176
176
  dry-struct (~> 1.6.0)
177
177
  dry-validation (~> 1.10.0)
@@ -298,7 +298,7 @@ DEPENDENCIES
298
298
  nokogiri (~> 1.13)
299
299
  pdf-reader (~> 1.4)
300
300
  pg (~> 1.5)
301
- pgvector (< 0.2)
301
+ pgvector (~> 0.2)
302
302
  pinecone (~> 0.1.6)
303
303
  pry-byebug (~> 3.10.0)
304
304
  qdrant-ruby (~> 0.9.0)
@@ -14,14 +14,15 @@ module Langchain
14
14
  # Equivalent to Langchain::Loader.new(path).load
15
15
  # @param path [String | Pathname] path to file or url
16
16
  # @return [String] file content
17
- def self.load(path)
18
- new(path).load
17
+ def self.load(path, options = {}, &block)
18
+ new(path, options).load(&block)
19
19
  end
20
20
 
21
21
  # Initialize Langchain::Loader
22
22
  # @param path [String | Pathname] path to file or url
23
23
  # @return [Langchain::Loader] loader instance
24
- def initialize(path)
24
+ def initialize(path, options = {})
25
+ @options = options
25
26
  @path = path
26
27
  end
27
28
 
@@ -35,39 +36,38 @@ module Langchain
35
36
 
36
37
  # Load data from a file or url
37
38
  # @return [String] file content
38
- def load
39
- url? ? from_url(@path) : from_path(@path)
39
+ def load(&block)
40
+ @raw_data = url? ? load_from_url : load_from_path
41
+
42
+ data = if block
43
+ yield @raw_data.read, @options
44
+ else
45
+ processor_klass.new(@options).parse(@raw_data)
46
+ end
47
+
48
+ Langchain::Data.new(data, source: @path)
40
49
  end
41
50
 
42
51
  private
43
52
 
44
- def from_url(url)
45
- process do
46
- data = URI.parse(url).open
47
- processor = find_processor(:CONTENT_TYPES, data.content_type)
48
- [data, processor]
49
- end
53
+ def load_from_url
54
+ URI.parse(@path).open
50
55
  end
51
56
 
52
- def from_path(path)
53
- raise FileNotFound unless File.exist?(path)
57
+ def load_from_path
58
+ raise FileNotFound unless File.exist?(@path)
54
59
 
55
- process do
56
- [File.open(path), find_processor(:EXTENSIONS, File.extname(path))]
57
- end
60
+ File.open(@path)
58
61
  end
59
62
 
60
- def process(&block)
61
- raw_data, kind = yield
63
+ def processor_klass
64
+ raise UnknownFormatError unless (kind = find_processor)
62
65
 
63
- raise UnknownFormatError unless kind
64
-
65
- processor = Langchain::Processors.const_get(kind).new
66
- Langchain::Data.new(processor.parse(raw_data), source: @path)
66
+ Langchain::Processors.const_get(kind)
67
67
  end
68
68
 
69
- def find_processor(constant, value)
70
- processors.find { |klass| processor_matches? "#{klass}::#{constant}", value }
69
+ def find_processor
70
+ processors.find { |klass| processor_matches? "#{klass}::#{lookup_constant}", source_type }
71
71
  end
72
72
 
73
73
  def processor_matches?(constant, value)
@@ -77,5 +77,13 @@ module Langchain
77
77
  def processors
78
78
  Langchain::Processors.constants
79
79
  end
80
+
81
+ def source_type
82
+ url? ? @raw_data.content_type : File.extname(@path)
83
+ end
84
+
85
+ def lookup_constant
86
+ url? ? :CONTENT_TYPES : :EXTENSIONS
87
+ end
80
88
  end
81
89
  end
@@ -6,6 +6,10 @@ module Langchain
6
6
  EXTENSIONS = []
7
7
  CONTENT_TYPES = []
8
8
 
9
+ def initialize(options = {})
10
+ @options = options
11
+ end
12
+
9
13
  def parse(data)
10
14
  raise NotImplementedError
11
15
  end
@@ -12,10 +12,16 @@ module Langchain
12
12
  # @param [File] data
13
13
  # @return [Array of Hash]
14
14
  def parse(data)
15
- ::CSV.new(data.read).map do |row|
15
+ ::CSV.new(data.read, col_sep: separator).map do |row|
16
16
  row.map(&:strip)
17
17
  end
18
18
  end
19
+
20
+ private
21
+
22
+ def separator
23
+ @options[:col_sep] || ","
24
+ end
19
25
  end
20
26
  end
21
27
  end
@@ -6,7 +6,7 @@ module Langchain
6
6
  EXTENSIONS = [".docx"]
7
7
  CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"]
8
8
 
9
- def initialize
9
+ def initialize(*)
10
10
  depends_on "docx"
11
11
  require "docx"
12
12
  end
@@ -9,7 +9,7 @@ module Langchain
9
9
  # We only look for headings and paragraphs
10
10
  TEXT_CONTENT_TAGS = %w[h1 h2 h3 h4 h5 h6 p]
11
11
 
12
- def initialize
12
+ def initialize(*)
13
13
  depends_on "nokogiri"
14
14
  require "nokogiri"
15
15
  end
@@ -6,7 +6,7 @@ module Langchain
6
6
  EXTENSIONS = [".pdf"]
7
7
  CONTENT_TYPES = ["application/pdf"]
8
8
 
9
- def initialize
9
+ def initialize(*)
10
10
  depends_on "pdf-reader"
11
11
  require "pdf-reader"
12
12
  end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.14"
4
+ VERSION = "0.3.15"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.14
4
+ version: 0.3.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-28 00:00:00.000000000 Z
11
+ date: 2023-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -182,14 +182,14 @@ dependencies:
182
182
  name: pgvector
183
183
  requirement: !ruby/object:Gem::Requirement
184
184
  requirements:
185
- - - "<"
185
+ - - "~>"
186
186
  - !ruby/object:Gem::Version
187
187
  version: '0.2'
188
188
  type: :development
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
- - - "<"
192
+ - - "~>"
193
193
  - !ruby/object:Gem::Version
194
194
  version: '0.2'
195
195
  - !ruby/object:Gem::Dependency
@@ -290,7 +290,7 @@ dependencies:
290
290
  - - "~>"
291
291
  - !ruby/object:Gem::Version
292
292
  version: 1.17.0
293
- description: Build ML/AI-powered applications with Ruby's LangChain
293
+ description: Build LLM-backed Ruby applications with Ruby's LangChain
294
294
  email:
295
295
  - andrei.bondarev13@gmail.com
296
296
  executables: []
@@ -366,15 +366,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
366
366
  requirements:
367
367
  - - ">="
368
368
  - !ruby/object:Gem::Version
369
- version: 2.6.0
369
+ version: 3.0.0
370
370
  required_rubygems_version: !ruby/object:Gem::Requirement
371
371
  requirements:
372
372
  - - ">="
373
373
  - !ruby/object:Gem::Version
374
374
  version: '0'
375
375
  requirements: []
376
- rubygems_version: 3.2.3
376
+ rubygems_version: 3.3.7
377
377
  signing_key:
378
378
  specification_version: 4
379
- summary: Build ML/AI-powered applications with Ruby's LangChain
379
+ summary: Build LLM-backed Ruby applications with Ruby's LangChain
380
380
  test_files: []