langchainrb 0.3.14 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 33c9436ac8d6a73dc06d30f63c11e4f246b3705aa8934765a53ee59325c3a9cd
4
- data.tar.gz: 9cc85603694f9367dd162e25379029a345aa0b5c88cccf303c2af114d43a4010
3
+ metadata.gz: 4f855e3c0e1f0d7b59e0255004a1a806c7048da6d3fe0a8ddf10be68e36ed9ba
4
+ data.tar.gz: 5758c90205c3e2bea420cf7fa0dec07638917beced60bd0482e1d803ced96c07
5
5
  SHA512:
6
- metadata.gz: ca5e81638625939d11999a64d44c92fc57c762a934aa8fd5b110c3f5aacc9a736ab5f02da4366e7a1b9b9ec0335dd1eb1683f5b9d90bd97c81914ea0a698dc7c
7
- data.tar.gz: aff49ef9451bcbc9a97d181757a5b913737cbfbb4fc3ca49d423cbd2e59a4a71091816e98c2996ff7f8292cb6e0c0d69931a4f3e4e36e2a69fdd7f745640e266
6
+ metadata.gz: ec26f8c4257a6949d829d7f68d2175943b80c2837739bdbead8e6b61891a6738cf84ec1934caff777b63dee39f3d0111e8f29a81c37c125be1cd3ae8137b6968
7
+ data.tar.gz: 31bb1aa0296dbbc8e1a1c6c2bc7236c92bf3e627935c752bd527c7f7d769da69f11c6bb63de6e0b4e0d1d8d0c739744045669220c35e253ae83eaa0e23482e6a
data/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.15] - 2023-05-30
4
+ - Drop Ruby 2.7 support. It had reached EOD.
5
+ - Bump pgvector-ruby to 0.2
6
+ - 🚚 Loaders
7
+ - Support for options and block to be passed to CSV processor
8
+
3
9
  ## [0.3.14] - 2023-05-28
4
10
  - 🔍 Vectorsearch
5
11
  - Not relying on Weaviate modules anymore
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- langchainrb (0.3.14)
4
+ langchainrb (0.3.15)
5
5
 
6
6
  GEM
7
7
  remote: https://rubygems.org/
@@ -171,7 +171,7 @@ GEM
171
171
  ruby-rc4
172
172
  ttfunk
173
173
  pg (1.5.3)
174
- pgvector (0.1.1)
174
+ pgvector (0.2.0)
175
175
  pinecone (0.1.71)
176
176
  dry-struct (~> 1.6.0)
177
177
  dry-validation (~> 1.10.0)
@@ -298,7 +298,7 @@ DEPENDENCIES
298
298
  nokogiri (~> 1.13)
299
299
  pdf-reader (~> 1.4)
300
300
  pg (~> 1.5)
301
- pgvector (< 0.2)
301
+ pgvector (~> 0.2)
302
302
  pinecone (~> 0.1.6)
303
303
  pry-byebug (~> 3.10.0)
304
304
  qdrant-ruby (~> 0.9.0)
@@ -14,14 +14,15 @@ module Langchain
14
14
  # Equivalent to Langchain::Loader.new(path).load
15
15
  # @param path [String | Pathname] path to file or url
16
16
  # @return [String] file content
17
- def self.load(path)
18
- new(path).load
17
+ def self.load(path, options = {}, &block)
18
+ new(path, options).load(&block)
19
19
  end
20
20
 
21
21
  # Initialize Langchain::Loader
22
22
  # @param path [String | Pathname] path to file or url
23
23
  # @return [Langchain::Loader] loader instance
24
- def initialize(path)
24
+ def initialize(path, options = {})
25
+ @options = options
25
26
  @path = path
26
27
  end
27
28
 
@@ -35,39 +36,38 @@ module Langchain
35
36
 
36
37
  # Load data from a file or url
37
38
  # @return [String] file content
38
- def load
39
- url? ? from_url(@path) : from_path(@path)
39
+ def load(&block)
40
+ @raw_data = url? ? load_from_url : load_from_path
41
+
42
+ data = if block
43
+ yield @raw_data.read, @options
44
+ else
45
+ processor_klass.new(@options).parse(@raw_data)
46
+ end
47
+
48
+ Langchain::Data.new(data, source: @path)
40
49
  end
41
50
 
42
51
  private
43
52
 
44
- def from_url(url)
45
- process do
46
- data = URI.parse(url).open
47
- processor = find_processor(:CONTENT_TYPES, data.content_type)
48
- [data, processor]
49
- end
53
+ def load_from_url
54
+ URI.parse(@path).open
50
55
  end
51
56
 
52
- def from_path(path)
53
- raise FileNotFound unless File.exist?(path)
57
+ def load_from_path
58
+ raise FileNotFound unless File.exist?(@path)
54
59
 
55
- process do
56
- [File.open(path), find_processor(:EXTENSIONS, File.extname(path))]
57
- end
60
+ File.open(@path)
58
61
  end
59
62
 
60
- def process(&block)
61
- raw_data, kind = yield
63
+ def processor_klass
64
+ raise UnknownFormatError unless (kind = find_processor)
62
65
 
63
- raise UnknownFormatError unless kind
64
-
65
- processor = Langchain::Processors.const_get(kind).new
66
- Langchain::Data.new(processor.parse(raw_data), source: @path)
66
+ Langchain::Processors.const_get(kind)
67
67
  end
68
68
 
69
- def find_processor(constant, value)
70
- processors.find { |klass| processor_matches? "#{klass}::#{constant}", value }
69
+ def find_processor
70
+ processors.find { |klass| processor_matches? "#{klass}::#{lookup_constant}", source_type }
71
71
  end
72
72
 
73
73
  def processor_matches?(constant, value)
@@ -77,5 +77,13 @@ module Langchain
77
77
  def processors
78
78
  Langchain::Processors.constants
79
79
  end
80
+
81
+ def source_type
82
+ url? ? @raw_data.content_type : File.extname(@path)
83
+ end
84
+
85
+ def lookup_constant
86
+ url? ? :CONTENT_TYPES : :EXTENSIONS
87
+ end
80
88
  end
81
89
  end
@@ -6,6 +6,10 @@ module Langchain
6
6
  EXTENSIONS = []
7
7
  CONTENT_TYPES = []
8
8
 
9
+ def initialize(options = {})
10
+ @options = options
11
+ end
12
+
9
13
  def parse(data)
10
14
  raise NotImplementedError
11
15
  end
@@ -12,10 +12,16 @@ module Langchain
12
12
  # @param [File] data
13
13
  # @return [Array of Hash]
14
14
  def parse(data)
15
- ::CSV.new(data.read).map do |row|
15
+ ::CSV.new(data.read, col_sep: separator).map do |row|
16
16
  row.map(&:strip)
17
17
  end
18
18
  end
19
+
20
+ private
21
+
22
+ def separator
23
+ @options[:col_sep] || ","
24
+ end
19
25
  end
20
26
  end
21
27
  end
@@ -6,7 +6,7 @@ module Langchain
6
6
  EXTENSIONS = [".docx"]
7
7
  CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"]
8
8
 
9
- def initialize
9
+ def initialize(*)
10
10
  depends_on "docx"
11
11
  require "docx"
12
12
  end
@@ -9,7 +9,7 @@ module Langchain
9
9
  # We only look for headings and paragraphs
10
10
  TEXT_CONTENT_TAGS = %w[h1 h2 h3 h4 h5 h6 p]
11
11
 
12
- def initialize
12
+ def initialize(*)
13
13
  depends_on "nokogiri"
14
14
  require "nokogiri"
15
15
  end
@@ -6,7 +6,7 @@ module Langchain
6
6
  EXTENSIONS = [".pdf"]
7
7
  CONTENT_TYPES = ["application/pdf"]
8
8
 
9
- def initialize
9
+ def initialize(*)
10
10
  depends_on "pdf-reader"
11
11
  require "pdf-reader"
12
12
  end
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.14"
4
+ VERSION = "0.3.15"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.14
4
+ version: 0.3.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-05-28 00:00:00.000000000 Z
11
+ date: 2023-05-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: dotenv-rails
@@ -182,14 +182,14 @@ dependencies:
182
182
  name: pgvector
183
183
  requirement: !ruby/object:Gem::Requirement
184
184
  requirements:
185
- - - "<"
185
+ - - "~>"
186
186
  - !ruby/object:Gem::Version
187
187
  version: '0.2'
188
188
  type: :development
189
189
  prerelease: false
190
190
  version_requirements: !ruby/object:Gem::Requirement
191
191
  requirements:
192
- - - "<"
192
+ - - "~>"
193
193
  - !ruby/object:Gem::Version
194
194
  version: '0.2'
195
195
  - !ruby/object:Gem::Dependency
@@ -290,7 +290,7 @@ dependencies:
290
290
  - - "~>"
291
291
  - !ruby/object:Gem::Version
292
292
  version: 1.17.0
293
- description: Build ML/AI-powered applications with Ruby's LangChain
293
+ description: Build LLM-backed Ruby applications with Ruby's LangChain
294
294
  email:
295
295
  - andrei.bondarev13@gmail.com
296
296
  executables: []
@@ -366,15 +366,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
366
366
  requirements:
367
367
  - - ">="
368
368
  - !ruby/object:Gem::Version
369
- version: 2.6.0
369
+ version: 3.0.0
370
370
  required_rubygems_version: !ruby/object:Gem::Requirement
371
371
  requirements:
372
372
  - - ">="
373
373
  - !ruby/object:Gem::Version
374
374
  version: '0'
375
375
  requirements: []
376
- rubygems_version: 3.2.3
376
+ rubygems_version: 3.3.7
377
377
  signing_key:
378
378
  specification_version: 4
379
- summary: Build ML/AI-powered applications with Ruby's LangChain
379
+ summary: Build LLM-backed Ruby applications with Ruby's LangChain
380
380
  test_files: []