simple_text_extract 3.0.2 → 3.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 388ec404e856a47441f3bb3e23873d70eff4fabf86d96afcfeb6f7ad324e262d
4
- data.tar.gz: 054f2cdb252b91ae14bdc678af1b75d433bfc6266d68ccace27ce4ec829c6484
3
+ metadata.gz: ddd2289191545f08129aef5ff67b85fe9f2a9c1d1fbe09a377d13afb9203031f
4
+ data.tar.gz: 99b7a2cb12381cd636115ea350ec4c4c4875e1260ddb7770355a9c6ab0ccf962
5
5
  SHA512:
6
- metadata.gz: 5c5a9d4f02bd00f32495803f60b2552d084f204a39df5c239d29e9be2c04013ae44b865c805c750c3a52a51a5e9b93a358e97d386a351aa16460664e048bac61
7
- data.tar.gz: f7bc4192659abaf6653f58c01b446e11e483dddad2efb91ebf077a88767346f50997d87ba8af5d4ab26e7ac2ed7bf6a4393fecf75b78b1968662a266e251a84e
6
+ metadata.gz: e91131e4458cacd400b751c22dd5d5cf8524ffc4724b4d499fbc522932eca4bde81a507b64ff818382ee620843b9d960921f0f72f970dbf20b09d5315e33515b
7
+ data.tar.gz: ee5c900bd6ac6cf3f12139976f27aab73dab455d8c73f1fe7e3082291b66a95d31efcaa749225f3f6258a9445539299703e0dde52a0009c8f6a5bd6f75047674
data/- ADDED
@@ -0,0 +1 @@
1
+ Test
data/CHANGELOG.md CHANGED
@@ -1,4 +1,8 @@
1
- ## 3.0.1 (2023-04-17)
1
+ ## 3.0.3 (2023-04-27)
2
+
3
+ - Add support for extracting text from tables in DOCX files.
4
+
5
+ ## 3.0.2 (2023-04-17)
2
6
 
3
7
  - Coerces filename in `SimpleTextExtract.supports?(filename:)` to string.
4
8
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- simple_text_extract (3.0.2)
4
+ simple_text_extract (3.0.3)
5
5
  roo (~> 2.10.0)
6
6
  rubyzip (~> 2.3.2)
7
7
  spreadsheet (~> 1.3.0)
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class SimpleTextExtract::Extract
3
+ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
4
4
  def self.formatter(path)
5
5
  case path
6
6
  when /.zip$/i
@@ -133,6 +133,13 @@ class SimpleTextExtract::Extract
133
133
  doc.xpath("//w:document//w:body/w:p").each do |node|
134
134
  result << node.text
135
135
  end
136
+
137
+ doc.xpath("//w:document//w:body//w:tbl").each do |node|
138
+ node.xpath(".//w:tr").each do |row|
139
+ text = row.xpath("w:tc").map(&:text)
140
+ result << text.join(", ")
141
+ end
142
+ end
136
143
  end
137
144
 
138
145
  result.join("\n")
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleTextExtract
4
- VERSION = "3.0.2"
4
+ VERSION = "3.0.3"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_text_extract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Weiland
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-04-18 00:00:00.000000000 Z
11
+ date: 2023-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: roo
@@ -59,6 +59,7 @@ executables: []
59
59
  extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
+ - "-"
62
63
  - ".github/workflows/build.yml"
63
64
  - ".gitignore"
64
65
  - ".rubocop.yml"