simple_text_extract 3.0.2 → 3.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 388ec404e856a47441f3bb3e23873d70eff4fabf86d96afcfeb6f7ad324e262d
4
- data.tar.gz: 054f2cdb252b91ae14bdc678af1b75d433bfc6266d68ccace27ce4ec829c6484
3
+ metadata.gz: ddd2289191545f08129aef5ff67b85fe9f2a9c1d1fbe09a377d13afb9203031f
4
+ data.tar.gz: 99b7a2cb12381cd636115ea350ec4c4c4875e1260ddb7770355a9c6ab0ccf962
5
5
  SHA512:
6
- metadata.gz: 5c5a9d4f02bd00f32495803f60b2552d084f204a39df5c239d29e9be2c04013ae44b865c805c750c3a52a51a5e9b93a358e97d386a351aa16460664e048bac61
7
- data.tar.gz: f7bc4192659abaf6653f58c01b446e11e483dddad2efb91ebf077a88767346f50997d87ba8af5d4ab26e7ac2ed7bf6a4393fecf75b78b1968662a266e251a84e
6
+ metadata.gz: e91131e4458cacd400b751c22dd5d5cf8524ffc4724b4d499fbc522932eca4bde81a507b64ff818382ee620843b9d960921f0f72f970dbf20b09d5315e33515b
7
+ data.tar.gz: ee5c900bd6ac6cf3f12139976f27aab73dab455d8c73f1fe7e3082291b66a95d31efcaa749225f3f6258a9445539299703e0dde52a0009c8f6a5bd6f75047674
data/- ADDED
@@ -0,0 +1 @@
1
+ Test
data/CHANGELOG.md CHANGED
@@ -1,4 +1,8 @@
1
- ## 3.0.1 (2023-04-17)
1
+ ## 3.0.3 (2023-04-27)
2
+
3
+ - Add support for extracting text from tables in DOCX files.
4
+
5
+ ## 3.0.2 (2023-04-17)
2
6
 
3
7
  - Coerces filename in `SimpleTextExtract.supports?(filename:)` to string.
4
8
 
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- simple_text_extract (3.0.2)
4
+ simple_text_extract (3.0.3)
5
5
  roo (~> 2.10.0)
6
6
  rubyzip (~> 2.3.2)
7
7
  spreadsheet (~> 1.3.0)
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- class SimpleTextExtract::Extract
3
+ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
4
4
  def self.formatter(path)
5
5
  case path
6
6
  when /.zip$/i
@@ -133,6 +133,13 @@ class SimpleTextExtract::Extract
133
133
  doc.xpath("//w:document//w:body/w:p").each do |node|
134
134
  result << node.text
135
135
  end
136
+
137
+ doc.xpath("//w:document//w:body//w:tbl").each do |node|
138
+ node.xpath(".//w:tr").each do |row|
139
+ text = row.xpath("w:tc").map(&:text)
140
+ result << text.join(", ")
141
+ end
142
+ end
136
143
  end
137
144
 
138
145
  result.join("\n")
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleTextExtract
4
- VERSION = "3.0.2"
4
+ VERSION = "3.0.3"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_text_extract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.2
4
+ version: 3.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Weiland
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-04-18 00:00:00.000000000 Z
11
+ date: 2023-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: roo
@@ -59,6 +59,7 @@ executables: []
59
59
  extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
+ - "-"
62
63
  - ".github/workflows/build.yml"
63
64
  - ".gitignore"
64
65
  - ".rubocop.yml"