langchainrb 0.3.8 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6b208f5fc51ce342bd7ffcfb776487452a40fb0505e4fa6a6b371e0db1d2a278
4
- data.tar.gz: 8551edf0406827f92026c8fde54b3b27f32727dec6381f5a33cd58c9c39d40a5
3
+ metadata.gz: fa4976937d6ccf9b2c0e1f0e0d98f53a98f21f0d9d1abbe85d6cc83b646cb8d7
4
+ data.tar.gz: e563cf1aaece650c2cbe2727b06b15c2e284276c78f1e820070d566a6d3b7386
5
5
  SHA512:
6
- metadata.gz: 0d0d10e84dd47b768979e4f004e9026aac48c45ed5e15ffe499dc0fc9679e806408cc5688cdbd06931e7f63e8840dbb33b5ad7f58ca311eb05a4528757fc9581
7
- data.tar.gz: 8723656cefc802cdd4464d24f452a858a1315e654d64d1c256cab9e1de5297c1de0950a4a625278fe33aa8f149db698878bfe608cd06051bc0f8eb8c5abb22f3
6
+ metadata.gz: 70e086ce4951b95f1f232b436354f2566caedb0d200b55e3f7ed3bd9589043b06053e78198f97ed47335825edae528e6559b99378899b2c435d5f4baa66f137b
7
+ data.tar.gz: 75a358762a36f9d37881fe9b265764cb4b8f9578c6598dd38bd3f7323b919e90c1531b7dda769b687e29eb595ec7852cc5698254a80dd5a5a334929894e783df
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.3.9] - 2023-05-19
4
+ - 🚚 Loaders
5
+ - Introduce `Loaders::Docx` to parse .docx files
6
+
3
7
  ## [0.3.8] - 2023-05-19
4
8
  - 🔍 Vectorsearch
5
9
  - Introduce support for Chroma DB
data/Gemfile CHANGED
@@ -10,3 +10,7 @@ gem "rake", "~> 13.0"
10
10
  gem "rspec", "~> 3.0"
11
11
 
12
12
  gem "standardrb"
13
+
14
+ # TODO: Remove this once a new version of the gem is released
15
+ # The new version of `docx` needs to include this fix: https://github.com/ruby-docx/docx/pull/130
16
+ gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"
data/Gemfile.lock CHANGED
@@ -1,7 +1,16 @@
1
+ GIT
2
+ remote: https://github.com/ruby-docx/docx.git
3
+ revision: 08b734da349d980d2d7549b907459a9e2aa5d3bb
4
+ branch: master
5
+ specs:
6
+ docx (0.7.0)
7
+ nokogiri (~> 1.13, >= 1.13.0)
8
+ rubyzip (~> 2.0)
9
+
1
10
  PATH
2
11
  remote: .
3
12
  specs:
4
- langchainrb (0.3.8)
13
+ langchainrb (0.3.9)
5
14
 
6
15
  GEM
7
16
  remote: https://rubygems.org/
@@ -230,6 +239,7 @@ GEM
230
239
  ruby-progressbar (1.13.0)
231
240
  ruby-rc4 (0.1.5)
232
241
  ruby2_keywords (0.0.5)
242
+ rubyzip (2.3.2)
233
243
  standard (1.28.2)
234
244
  language_server-protocol (~> 3.17.0.2)
235
245
  lint_roller (~> 1.0)
@@ -267,6 +277,7 @@ PLATFORMS
267
277
  DEPENDENCIES
268
278
  chroma-db (~> 0.3.0)
269
279
  cohere-ruby (~> 0.9.3)
280
+ docx!
270
281
  dotenv-rails (~> 2.7.6)
271
282
  eqn (~> 1.6.5)
272
283
  google_search_results (~> 2.0.0)
data/lib/langchain.rb CHANGED
@@ -55,6 +55,7 @@ end
55
55
 
56
56
  module Loaders
57
57
  autoload :Base, "loaders/base"
58
+ autoload :Docx, "loaders/docx"
58
59
  autoload :PDF, "loaders/pdf"
59
60
  autoload :Text, "loaders/text"
60
61
  end
@@ -62,4 +63,4 @@ end
62
63
  autoload :Loader, "loader"
63
64
 
64
65
  # Load the default Loaders
65
- Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF]
66
+ Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF, ::Loaders::Docx]
data/lib/loaders/base.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ # TODO: Add chunking options to the loaders
4
+
3
5
  module Loaders
4
6
  class Base
5
7
  def self.load(path)
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Loaders
4
+ class Docx < Base
5
+ #
6
+ # This Loader parses Docx files into text.
7
+ # If you'd like to use it directly you can do so like this:
8
+ # Loaders::Docx.new("path/to/my.docx").load
9
+ #
10
+ # This parser is also invoked when you're adding data to a Vectorsearch DB:
11
+ # qdrant = Vectorsearch::Qdrant.new(...)
12
+ # path = Langchain.root.join("path/to/my.docx")
13
+ # qdrant.add_data(path: path)
14
+ #
15
+
16
+ def initialize(path)
17
+ depends_on "docx"
18
+ require "docx"
19
+
20
+ @path = path
21
+ end
22
+
23
+ # Check that the file is a `.docx` file
24
+ def loadable?
25
+ @path.to_s.end_with?(".docx")
26
+ end
27
+
28
+ def load
29
+ ::Docx::Document
30
+ .open(@path.to_s)
31
+ .text
32
+ end
33
+ end
34
+ end
data/lib/loaders/pdf.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loaders
2
4
  class PDF < Base
3
5
  #
data/lib/loaders/text.rb CHANGED
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Loaders
2
4
  class Text < Base
3
5
  #
data/lib/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Langchain
4
- VERSION = "0.3.8"
4
+ VERSION = "0.3.9"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: langchainrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.8
4
+ version: 0.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrei Bondarev
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: 0.3.0
69
+ - !ruby/object:Gem::Dependency
70
+ name: docx
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: eqn
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -238,6 +252,7 @@ files:
238
252
  - lib/llm/openai.rb
239
253
  - lib/loader.rb
240
254
  - lib/loaders/base.rb
255
+ - lib/loaders/docx.rb
241
256
  - lib/loaders/pdf.rb
242
257
  - lib/loaders/text.rb
243
258
  - lib/prompt/base.rb