langchainrb 0.3.8 → 0.3.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +12 -1
- data/lib/langchain.rb +2 -1
- data/lib/loaders/base.rb +2 -0
- data/lib/loaders/docx.rb +34 -0
- data/lib/loaders/pdf.rb +2 -0
- data/lib/loaders/text.rb +2 -0
- data/lib/version.rb +1 -1
- metadata +16 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fa4976937d6ccf9b2c0e1f0e0d98f53a98f21f0d9d1abbe85d6cc83b646cb8d7
|
4
|
+
data.tar.gz: e563cf1aaece650c2cbe2727b06b15c2e284276c78f1e820070d566a6d3b7386
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 70e086ce4951b95f1f232b436354f2566caedb0d200b55e3f7ed3bd9589043b06053e78198f97ed47335825edae528e6559b99378899b2c435d5f4baa66f137b
|
7
|
+
data.tar.gz: 75a358762a36f9d37881fe9b265764cb4b8f9578c6598dd38bd3f7323b919e90c1531b7dda769b687e29eb595ec7852cc5698254a80dd5a5a334929894e783df
|
data/CHANGELOG.md
CHANGED
data/Gemfile
CHANGED
@@ -10,3 +10,7 @@ gem "rake", "~> 13.0"
|
|
10
10
|
gem "rspec", "~> 3.0"
|
11
11
|
|
12
12
|
gem "standardrb"
|
13
|
+
|
14
|
+
# TODO: Remove this once a new version of the gem is released
|
15
|
+
# The new version of `docx` needs to include this fix: https://github.com/ruby-docx/docx/pull/130
|
16
|
+
gem "docx", branch: "master", git: "https://github.com/ruby-docx/docx.git"
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,16 @@
|
|
1
|
+
GIT
|
2
|
+
remote: https://github.com/ruby-docx/docx.git
|
3
|
+
revision: 08b734da349d980d2d7549b907459a9e2aa5d3bb
|
4
|
+
branch: master
|
5
|
+
specs:
|
6
|
+
docx (0.7.0)
|
7
|
+
nokogiri (~> 1.13, >= 1.13.0)
|
8
|
+
rubyzip (~> 2.0)
|
9
|
+
|
1
10
|
PATH
|
2
11
|
remote: .
|
3
12
|
specs:
|
4
|
-
langchainrb (0.3.
|
13
|
+
langchainrb (0.3.9)
|
5
14
|
|
6
15
|
GEM
|
7
16
|
remote: https://rubygems.org/
|
@@ -230,6 +239,7 @@ GEM
|
|
230
239
|
ruby-progressbar (1.13.0)
|
231
240
|
ruby-rc4 (0.1.5)
|
232
241
|
ruby2_keywords (0.0.5)
|
242
|
+
rubyzip (2.3.2)
|
233
243
|
standard (1.28.2)
|
234
244
|
language_server-protocol (~> 3.17.0.2)
|
235
245
|
lint_roller (~> 1.0)
|
@@ -267,6 +277,7 @@ PLATFORMS
|
|
267
277
|
DEPENDENCIES
|
268
278
|
chroma-db (~> 0.3.0)
|
269
279
|
cohere-ruby (~> 0.9.3)
|
280
|
+
docx!
|
270
281
|
dotenv-rails (~> 2.7.6)
|
271
282
|
eqn (~> 1.6.5)
|
272
283
|
google_search_results (~> 2.0.0)
|
data/lib/langchain.rb
CHANGED
@@ -55,6 +55,7 @@ end
|
|
55
55
|
|
56
56
|
module Loaders
|
57
57
|
autoload :Base, "loaders/base"
|
58
|
+
autoload :Docx, "loaders/docx"
|
58
59
|
autoload :PDF, "loaders/pdf"
|
59
60
|
autoload :Text, "loaders/text"
|
60
61
|
end
|
@@ -62,4 +63,4 @@ end
|
|
62
63
|
autoload :Loader, "loader"
|
63
64
|
|
64
65
|
# Load the default Loaders
|
65
|
-
Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF]
|
66
|
+
Langchain.default_loaders ||= [::Loaders::Text, ::Loaders::PDF, ::Loaders::Docx]
|
data/lib/loaders/base.rb
CHANGED
data/lib/loaders/docx.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Loaders
|
4
|
+
class Docx < Base
|
5
|
+
#
|
6
|
+
# This Loader parses Docx files into text.
|
7
|
+
# If you'd like to use it directly you can do so like this:
|
8
|
+
# Loaders::Docx.new("path/to/my.docx").load
|
9
|
+
#
|
10
|
+
# This parser is also invoked when you're adding data to a Vectorsearch DB:
|
11
|
+
# qdrant = Vectorsearch::Qdrant.new(...)
|
12
|
+
# path = Langchain.root.join("path/to/my.docx")
|
13
|
+
# qdrant.add_data(path: path)
|
14
|
+
#
|
15
|
+
|
16
|
+
def initialize(path)
|
17
|
+
depends_on "docx"
|
18
|
+
require "docx"
|
19
|
+
|
20
|
+
@path = path
|
21
|
+
end
|
22
|
+
|
23
|
+
# Check that the file is a `.docx` file
|
24
|
+
def loadable?
|
25
|
+
@path.to_s.end_with?(".docx")
|
26
|
+
end
|
27
|
+
|
28
|
+
def load
|
29
|
+
::Docx::Document
|
30
|
+
.open(@path.to_s)
|
31
|
+
.text
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/loaders/pdf.rb
CHANGED
data/lib/loaders/text.rb
CHANGED
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: langchainrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrei Bondarev
|
@@ -66,6 +66,20 @@ dependencies:
|
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 0.3.0
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: docx
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
69
83
|
- !ruby/object:Gem::Dependency
|
70
84
|
name: eqn
|
71
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -238,6 +252,7 @@ files:
|
|
238
252
|
- lib/llm/openai.rb
|
239
253
|
- lib/loader.rb
|
240
254
|
- lib/loaders/base.rb
|
255
|
+
- lib/loaders/docx.rb
|
241
256
|
- lib/loaders/pdf.rb
|
242
257
|
- lib/loaders/text.rb
|
243
258
|
- lib/prompt/base.rb
|