simple_text_extract 3.0.1 → 3.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4d97ba86e11c6ad6e14e3e2af600eddc00378747a693a51be38a15da7c73b9b5
4
- data.tar.gz: 401dd3aee14d95cc750397bc7b9e6393dbb49eb45d9369d39096a5a6046cb0d8
3
+ metadata.gz: ddd2289191545f08129aef5ff67b85fe9f2a9c1d1fbe09a377d13afb9203031f
4
+ data.tar.gz: 99b7a2cb12381cd636115ea350ec4c4c4875e1260ddb7770355a9c6ab0ccf962
5
5
  SHA512:
6
- metadata.gz: 76156def8c686dca8dba70e7ac36fb58a65757610c7cdb9bfd73c9a13c161d64db1730a21bcf5746a75398cfea2bc1a47e52acfd2e038c414e1a102df05b7854
7
- data.tar.gz: f327f06da4692531b051e291f20529889e123c508f15643ade8486819b2ac91ab04bde79b1ed5b6091d3bf8e28c3895fcdbf3cf1148367c16fb2743190aab159
6
+ metadata.gz: e91131e4458cacd400b751c22dd5d5cf8524ffc4724b4d499fbc522932eca4bde81a507b64ff818382ee620843b9d960921f0f72f970dbf20b09d5315e33515b
7
+ data.tar.gz: ee5c900bd6ac6cf3f12139976f27aab73dab455d8c73f1fe7e3082291b66a95d31efcaa749225f3f6258a9445539299703e0dde52a0009c8f6a5bd6f75047674
data/- ADDED
@@ -0,0 +1 @@
1
+ Test
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 3.0.3 (2023-04-27)
2
+
3
+ - Add support for extracting text from tables in DOCX files.
4
+
5
+ ## 3.0.2 (2023-04-17)
6
+
7
+ - Coerces filename in `SimpleTextExtract.supports?(filename:)` to string.
8
+
1
9
  ## 3.0.1 (2023-04-17)
2
10
 
3
11
  - Fixes printing of Roo::Excelx::Cell::Empty for empty rows
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- simple_text_extract (3.0.1)
4
+ simple_text_extract (3.0.3)
5
5
  roo (~> 2.10.0)
6
6
  rubyzip (~> 2.3.2)
7
7
  spreadsheet (~> 1.3.0)
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class SimpleTextExtract::Extract
3
+ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
4
4
  def self.formatter(path)
5
5
  case path
6
6
  when /.zip$/i
@@ -133,6 +133,13 @@ class SimpleTextExtract::Extract
133
133
  doc.xpath("//w:document//w:body/w:p").each do |node|
134
134
  result << node.text
135
135
  end
136
+
137
+ doc.xpath("//w:document//w:body//w:tbl").each do |node|
138
+ node.xpath(".//w:tr").each do |row|
139
+ text = row.xpath("w:tc").map(&:text)
140
+ result << text.join(", ")
141
+ end
142
+ end
136
143
  end
137
144
 
138
145
  result.join("\n")
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleTextExtract
4
- VERSION = "3.0.1"
4
+ VERSION = "3.0.3"
5
5
  end
@@ -13,7 +13,7 @@ module SimpleTextExtract
13
13
  end
14
14
 
15
15
  def self.supports?(filename: nil)
16
- SUPPORTED_FILETYPES.include?(filename.split(".").last)
16
+ SUPPORTED_FILETYPES.include?(filename.to_s.split(".").last)
17
17
  end
18
18
 
19
19
  def self.missing_dependency?(command)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_text_extract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.1
4
+ version: 3.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Weiland
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-04-17 00:00:00.000000000 Z
11
+ date: 2023-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: roo
@@ -59,6 +59,7 @@ executables: []
59
59
  extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
+ - "-"
62
63
  - ".github/workflows/build.yml"
63
64
  - ".gitignore"
64
65
  - ".rubocop.yml"