langchainrb 0.3.8 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6b208f5fc51ce342bd7ffcfb776487452a40fb0505e4fa6a6b371e0db1d2a278
4
- data.tar.gz: 8551edf0406827f92026c8fde54b3b27f32727dec6381f5a33cd58c9c39d40a5
3
+ metadata.gz: fa4976937d6ccf9b2c0e1f0e0d98f53a98f21f0d9d1abbe85d6cc83b646cb8d7
4
+ data.tar.gz: e563cf1aaece650c2cbe2727b06b15c2e284276c78f1e820070d566a6d3b7386
5
5
  SHA512:
6
- metadata.gz: 0d0d10e84dd47b768979e4f004e9026aac48c45ed5e15ffe499dc0fc9679e806408cc5688cdbd06931e7f63e8840dbb33b5ad7f58ca311eb05a4528757fc9581
7
- data.tar.gz: 8723656cefc802cdd4464d24f452a858a1315e654d64d1c256cab9e1de5297c1de0950a4a625278fe33aa8f149db698878bfe608cd06051bc0f8eb8c5abb22f3
6
+ metadata.gz: 70e086ce4951b95f1f232b436354f2566caedb0d200b55e3f7ed3bd9589043b06053e78198f97ed47335825edae528e6559b99378899b2c435d5f4baa66f137b
7
+ data.tar.gz: 75a358762a36f9d37881fe9b265764cb4b8f9578c6598dd38bd3f7323b919e90c1531b7dda769b687e29eb595ec7852cc5698254a80dd5a5a334929894e783df
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.9] - 2023-05-19
4
+ - 🚚 Loaders
5
+ - Introduce `Loaders::Docx` to parse .docx files
6
+
3
7
  ## [0.3.8] - 2023-05-19
4
8
  - 🔍 Vectorsearch
5
9
  - Introduce support for Chroma DB
data/Gemfile CHANGED
@@ -10,3 +10,7 @@ gem "rake", "~> 13.0"
10
10
  gem "rspec", "~> 3.0"
11
11
 
12
12
  gem "standardrb"
13
+
14
+ # TODO: Remove this once a new version of the gem is released
15
+ # The new version of `docx` needs to include this fix: https://github.com/ruby-docx/docx/pull/130
16
+ gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"
data/Gemfile.lock CHANGED
@@ -1,7 +1,16 @@
1
+ GIT
2
+ remote: https://github.com/ruby-docx/docx.git
3
+ revision: 08b734da349d980d2d7549b907459a9e2aa5d3bb
4
+ branch: master
5
+ specs:
6
+ docx (0.7.0)
7
+ nokogiri (~> 1.13, >= 1.13.0)
8
+ rubyzip (~> 2.0)
9
+
1
10
  PATH
2
11
  remote: .
3
12
  specs:
4
- langchainrb (0.3.8)
13
+ langchainrb (0.3.9)
5
14
 
6
15
  GEM
7
16
  remote: https://rubygems.org/
@@ -230,6 +239,7 @@ GEM
230
239
  ruby-progressbar (1.13.0)
231
240
  ruby-rc4 (0.1.5)
232
241
  ruby2_keywords (0.0.5)
242
+ rubyzip (2.3.2)
233
243
  standard (1.28.2)
234
244
  language_server-protocol (~> 3.17.0.2)
235
245
  lint_roller (~> 1.0)
@@ -267,6 +277,7 @@ PLATFORMS
267
277
  DEPENDENCIES
268
278
  chroma-db (~> 0.3.0)
269
279
  cohere-ruby (~> 0.9.3)
280
+ docx!
270
281
  dotenv-rails (~> 2.7.6)
271
282
  eqn (~> 1.6.5)
272
283
  google_search_results (~> 2.0.0)
data/lib/langchain.rb CHANGED
@@ -55,6 +55,7 @@ end
55
55
 
56
56
  module Loaders
57
57
  autoload :Base, "loaders/base"
58
+ autoload :Docx, "loaders/docx"
58
59
  autoload :PDF, "loaders/pdf"
59
60
  autoload :Text, "loaders/text"
60
61
  end
@@ -62,4 +63,4 @@ end
62
63
  autoload :Loader, "loader"
63
64
 
64
65
  # Load the default Loaders
65
- Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF]
66
+ Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF, ::Loaders::Docx]
data/lib/loaders/base.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # TODO: Add chunking options to the loaders
4
+
3
5
  module Loaders
4
6
  class Base
5
7
  def self.load(path)
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loaders
4
+ class Docx < Base
5
+ #
6
+ # This Loader parses Docx files into text.
7
+ # If you'd like to use it directly you can do so like this:
8
+ # Loaders::Docx.new("path/to/my.docx").load
9
+ #
10
+ # This parser is also invoked when you're adding data to a Vectorsearch DB:
11
+ # qdrant = Vectorsearch::Qdrant.new(...)
12
+ # path = Langchain.root.join("path/to/my.docx")
13
+ # qdrant.add_data(path: path)
14
+ #
15
+
16
+ def initialize(path)
17
+ depends_on "docx"
18
+ require "docx"
19
+
20
+ @path = path
21
+ end
22
+
23
+ # Check that the file is a `.docx` file
24
+ def loadable?
25
+ @path.to_s.end_with?(".docx")
26
+ end
27
+
28
+ def load
29
+ ::Docx::Document
30
+ .open(@path.to_s)
31
+ .text
32
+ end
33
+ end
34
+ end
data/lib/loaders/pdf.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loaders
2
4
  class PDF < Base
3
5
  #
data/lib/loaders/text.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loaders
2
4
  class Text < Base
3
5
  #
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.8"
4
+ VERSION = "0.3.9"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: 0.3.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: docx
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: eqn
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -238,6 +252,7 @@ files:
238
252
  - lib/llm/openai.rb
239
253
  - lib/loader.rb
240
254
  - lib/loaders/base.rb
255
+ - lib/loaders/docx.rb
241
256
  - lib/loaders/pdf.rb
242
257
  - lib/loaders/text.rb
243
258
  - lib/prompt/base.rb