simple_text_extract 3.0.5 → 3.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 48a3c8805698e6f4af386789e4b4f11e9a3e38425bb1c2ce467ec7cefa99f2ba
4
- data.tar.gz: f66a75c47984d63cf41b4f09f8c874f94da1f2928dddf268b4aac5ce48413f85
3
+ metadata.gz: bad3e0fab883d324b24e9a06cacdceed53de65e51489be88557a3dcdc5bf39b6
4
+ data.tar.gz: 2c8b53df878e404f90ca65eacae7ce25bc1663de7f138bed7c8ee744b13ee456
5
5
  SHA512:
6
- metadata.gz: 5c8a892a0916945062f298c1b78728f62962ccca8404c9aabc7724db73b0806f00c1074bac7e13a94d390c7b67d19b7f5398ef96ea6ef41439ea66ba4bf78d3b
7
- data.tar.gz: f0de6277f88c6debeaaac4bea385b3ba6e4d9180f53e5a2cef288ce5f80a30e92f67ad7e46c08aa908fb5ee30ccaffb1aaf0ea5ac4560cb2aa8262c8a610ce05
6
+ metadata.gz: a2e1d6659ddfd9cf3252afc481db1077499ecfab0b4602b52555d6f5f18a29b7a7bbeb671009a0505be547a5ad553accbebf037df3f5a4aec1f9673fc4ff0069
7
+ data.tar.gz: c3363629bf98d6fd55ac380830c2b7721903361a021e0ee10c5ddf33566be27199dda7e569cdc6821110a2200d3804d62f85d936a99165b58584df52c955afe1
@@ -1,14 +1,20 @@
1
1
  name: build
2
- on: [push, pull_request]
2
+ on:
3
+ push:
4
+ branches:
5
+ - master
6
+ pull_request:
7
+ branches:
8
+ - master
3
9
  jobs:
4
10
  build:
5
11
  strategy:
6
12
  fail-fast: false
7
13
  matrix:
8
14
  include:
9
- - ruby: 3.2
15
+ - ruby: 3.3
10
16
  gemfile: Gemfile
11
- - ruby: 3.1
17
+ - ruby: 3.2
12
18
  gemfile: Gemfile
13
19
  runs-on: ubuntu-latest
14
20
  env:
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.2.2
1
+ 3.3.4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 3.0.6 (2024-07-12)
2
+
3
+ - Stop depending on Active::Support (#present?)
4
+
1
5
  ## 3.0.3 (2023-04-27)
2
6
 
3
7
  - Add support for extracting text from tables in DOCX files.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- simple_text_extract (3.0.5)
4
+ simple_text_extract (3.0.7)
5
5
  roo (~> 2.10.0)
6
6
  rubyzip (~> 2.3.2)
7
7
  spreadsheet (~> 1.3.0)
@@ -10,54 +10,56 @@ GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
12
  ast (2.4.2)
13
- base64 (0.1.1)
13
+ bigdecimal (3.1.8)
14
14
  coderay (1.1.3)
15
- json (2.6.3)
15
+ json (2.7.2)
16
16
  language_server-protocol (3.17.0.3)
17
- memory_profiler (1.0.1)
18
- method_source (1.0.0)
19
- minitest (5.20.0)
20
- mocha (2.1.0)
17
+ memory_profiler (1.0.2)
18
+ method_source (1.1.0)
19
+ minitest (5.24.1)
20
+ mocha (2.4.0)
21
21
  ruby2_keywords (>= 0.0.5)
22
- nokogiri (1.15.4-arm64-darwin)
22
+ nokogiri (1.16.6-arm64-darwin)
23
23
  racc (~> 1.4)
24
- nokogiri (1.15.4-x86_64-linux)
24
+ nokogiri (1.16.6-x86_64-linux)
25
25
  racc (~> 1.4)
26
- parallel (1.23.0)
27
- parser (3.2.2.4)
26
+ parallel (1.25.1)
27
+ parser (3.3.4.0)
28
28
  ast (~> 2.4.1)
29
29
  racc
30
30
  pry (0.14.2)
31
31
  coderay (~> 1.1)
32
32
  method_source (~> 1.0)
33
- racc (1.7.1)
33
+ racc (1.8.0)
34
34
  rainbow (3.1.1)
35
- rake (13.0.6)
36
- regexp_parser (2.8.2)
37
- rexml (3.2.6)
38
- roo (2.10.0)
35
+ rake (13.2.1)
36
+ regexp_parser (2.9.2)
37
+ rexml (3.3.1)
38
+ strscan
39
+ roo (2.10.1)
39
40
  nokogiri (~> 1)
40
41
  rubyzip (>= 1.3.0, < 3.0.0)
41
- rubocop (1.57.1)
42
- base64 (~> 0.1.1)
42
+ rubocop (1.65.0)
43
43
  json (~> 2.3)
44
44
  language_server-protocol (>= 3.17.0)
45
45
  parallel (~> 1.10)
46
- parser (>= 3.2.2.4)
46
+ parser (>= 3.3.0.2)
47
47
  rainbow (>= 2.2.2, < 4.0)
48
- regexp_parser (>= 1.8, < 3.0)
48
+ regexp_parser (>= 2.4, < 3.0)
49
49
  rexml (>= 3.2.5, < 4.0)
50
- rubocop-ast (>= 1.28.1, < 2.0)
50
+ rubocop-ast (>= 1.31.1, < 2.0)
51
51
  ruby-progressbar (~> 1.7)
52
52
  unicode-display_width (>= 2.4.0, < 3.0)
53
- rubocop-ast (1.29.0)
54
- parser (>= 3.2.1.0)
55
- ruby-ole (1.2.12.2)
53
+ rubocop-ast (1.31.3)
54
+ parser (>= 3.3.1.0)
55
+ ruby-ole (1.2.13.1)
56
56
  ruby-progressbar (1.13.0)
57
57
  ruby2_keywords (0.0.5)
58
58
  rubyzip (2.3.2)
59
- spreadsheet (1.3.0)
59
+ spreadsheet (1.3.1)
60
+ bigdecimal
60
61
  ruby-ole
62
+ strscan (3.1.0)
61
63
  unicode-display_width (2.5.0)
62
64
 
63
65
  PLATFORMS
@@ -75,4 +77,4 @@ DEPENDENCIES
75
77
  simple_text_extract!
76
78
 
77
79
  BUNDLED WITH
78
- 2.4.10
80
+ 2.5.11
@@ -78,7 +78,7 @@ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
78
78
  def pdf_extract
79
79
  return nil if SimpleTextExtract.missing_dependency?("pdftotext")
80
80
 
81
- `pdftotext #{Shellwords.escape(file.path)} -`
81
+ `pdftotext #{Shellwords.escape(file.path)} - 2>/dev/null`
82
82
  end
83
83
 
84
84
  def xlsx_extract
@@ -93,7 +93,7 @@ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
93
93
  text << "# Sheet Name: #{name}"
94
94
 
95
95
  spreadsheet.sheet(name)&.each_row_streaming do |row|
96
- text << row.filter(&:present?).join(" ")
96
+ text << row.map(&:to_s).join(" ")
97
97
  end
98
98
  end
99
99
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleTextExtract
4
- VERSION = "3.0.5"
4
+ VERSION = "3.0.7"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_text_extract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.5
4
+ version: 3.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Weiland
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-23 00:00:00.000000000 Z
11
+ date: 2024-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: roo
@@ -60,7 +60,7 @@ extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
62
  - "-"
63
- - ".github/workflows/build.yml"
63
+ - ".github/workflows/test.yml"
64
64
  - ".gitignore"
65
65
  - ".rubocop.yml"
66
66
  - ".ruby-version"
@@ -97,7 +97,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
97
97
  requirements:
98
98
  - antiword
99
99
  - pdftotext/poppler
100
- rubygems_version: 3.4.10
100
+ rubygems_version: 3.5.15
101
101
  signing_key:
102
102
  specification_version: 4
103
103
  summary: Extract text from various file types before resorting to an OCR solution.