simple_text_extract 3.0.2 → 3.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/- +1 -0
- data/CHANGELOG.md +5 -1
- data/Gemfile.lock +1 -1
- data/lib/simple_text_extract/extract.rb +9 -2
- data/lib/simple_text_extract/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1bcf42e8ea86d7b9bb1f14731cbf2219e5d2a3c9bca5288e0441776b9822a835
|
4
|
+
data.tar.gz: 3d17a5ec43537ea50c64b8b07237cd21125794084fa964a121267895b5ba5023
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f452ddf56c6d464f4d16de10e4c934bd97b78e6adbedb43b163a5511881b81d466f3064e963410957b194420f74c0c6b554356b411f03845b37c417c845244f
|
7
|
+
data.tar.gz: be9d14077b31539fe14aec768cea5669958c922f0360eb8125e9289a8df61a36a6a2574b7ea1458eed010cf474ef80d70e7a601083016a0d968e37417c314c30
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
class SimpleTextExtract::Extract
|
3
|
+
class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
|
4
4
|
def self.formatter(path)
|
5
5
|
case path
|
6
6
|
when /.zip$/i
|
@@ -28,7 +28,7 @@ class SimpleTextExtract::Extract
|
|
28
28
|
end
|
29
29
|
|
30
30
|
def to_s
|
31
|
-
@to_s ||= extract.to_s.gsub(/[^\S\n]+/, " ").gsub(/\s?\n\s+/, "\n").strip
|
31
|
+
@to_s ||= extract.to_s.scrub.gsub(/[^\S\n]+/, " ").gsub(/\s?\n\s+/, "\n").strip
|
32
32
|
end
|
33
33
|
|
34
34
|
private
|
@@ -133,6 +133,13 @@ class SimpleTextExtract::Extract
|
|
133
133
|
doc.xpath("//w:document//w:body/w:p").each do |node|
|
134
134
|
result << node.text
|
135
135
|
end
|
136
|
+
|
137
|
+
doc.xpath("//w:document//w:body//w:tbl").each do |node|
|
138
|
+
node.xpath(".//w:tr").each do |row|
|
139
|
+
text = row.xpath("w:tc").map(&:text)
|
140
|
+
result << text.join(", ")
|
141
|
+
end
|
142
|
+
end
|
136
143
|
end
|
137
144
|
|
138
145
|
result.join("\n")
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_text_extract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Weiland
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: roo
|
@@ -59,6 +59,7 @@ executables: []
|
|
59
59
|
extensions: []
|
60
60
|
extra_rdoc_files: []
|
61
61
|
files:
|
62
|
+
- "-"
|
62
63
|
- ".github/workflows/build.yml"
|
63
64
|
- ".gitignore"
|
64
65
|
- ".rubocop.yml"
|