simple_text_extract 3.0.1 → 3.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/- +1 -0
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +1 -1
- data/lib/simple_text_extract/extract.rb +8 -1
- data/lib/simple_text_extract/version.rb +1 -1
- data/lib/simple_text_extract.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ddd2289191545f08129aef5ff67b85fe9f2a9c1d1fbe09a377d13afb9203031f
|
4
|
+
data.tar.gz: 99b7a2cb12381cd636115ea350ec4c4c4875e1260ddb7770355a9c6ab0ccf962
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e91131e4458cacd400b751c22dd5d5cf8524ffc4724b4d499fbc522932eca4bde81a507b64ff818382ee620843b9d960921f0f72f970dbf20b09d5315e33515b
|
7
|
+
data.tar.gz: ee5c900bd6ac6cf3f12139976f27aab73dab455d8c73f1fe7e3082291b66a95d31efcaa749225f3f6258a9445539299703e0dde52a0009c8f6a5bd6f75047674
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
## 3.0.3 (2023-04-27)
|
2
|
+
|
3
|
+
- Add support for extracting text from tables in DOCX files.
|
4
|
+
|
5
|
+
## 3.0.2 (2023-04-17)
|
6
|
+
|
7
|
+
- Coerces filename in `SimpleTextExtract.supports?(filename:)` to string.
|
8
|
+
|
1
9
|
## 3.0.1 (2023-04-17)
|
2
10
|
|
3
11
|
- Fixes printing of Roo::Excelx::Cell::Empty for empty rows
|
data/Gemfile.lock
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
class SimpleTextExtract::Extract
|
3
|
+
class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
|
4
4
|
def self.formatter(path)
|
5
5
|
case path
|
6
6
|
when /.zip$/i
|
@@ -133,6 +133,13 @@ class SimpleTextExtract::Extract
|
|
133
133
|
doc.xpath("//w:document//w:body/w:p").each do |node|
|
134
134
|
result << node.text
|
135
135
|
end
|
136
|
+
|
137
|
+
doc.xpath("//w:document//w:body//w:tbl").each do |node|
|
138
|
+
node.xpath(".//w:tr").each do |row|
|
139
|
+
text = row.xpath("w:tc").map(&:text)
|
140
|
+
result << text.join(", ")
|
141
|
+
end
|
142
|
+
end
|
136
143
|
end
|
137
144
|
|
138
145
|
result.join("\n")
|
data/lib/simple_text_extract.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_text_extract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Weiland
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-04-
|
11
|
+
date: 2023-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: roo
|
@@ -59,6 +59,7 @@ executables: []
|
|
59
59
|
extensions: []
|
60
60
|
extra_rdoc_files: []
|
61
61
|
files:
|
62
|
+
- "-"
|
62
63
|
- ".github/workflows/build.yml"
|
63
64
|
- ".gitignore"
|
64
65
|
- ".rubocop.yml"
|