simple_text_extract 3.0.5 → 3.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 48a3c8805698e6f4af386789e4b4f11e9a3e38425bb1c2ce467ec7cefa99f2ba
4
- data.tar.gz: f66a75c47984d63cf41b4f09f8c874f94da1f2928dddf268b4aac5ce48413f85
3
+ metadata.gz: bad3e0fab883d324b24e9a06cacdceed53de65e51489be88557a3dcdc5bf39b6
4
+ data.tar.gz: 2c8b53df878e404f90ca65eacae7ce25bc1663de7f138bed7c8ee744b13ee456
5
5
  SHA512:
6
- metadata.gz: 5c8a892a0916945062f298c1b78728f62962ccca8404c9aabc7724db73b0806f00c1074bac7e13a94d390c7b67d19b7f5398ef96ea6ef41439ea66ba4bf78d3b
7
- data.tar.gz: f0de6277f88c6debeaaac4bea385b3ba6e4d9180f53e5a2cef288ce5f80a30e92f67ad7e46c08aa908fb5ee30ccaffb1aaf0ea5ac4560cb2aa8262c8a610ce05
6
+ metadata.gz: a2e1d6659ddfd9cf3252afc481db1077499ecfab0b4602b52555d6f5f18a29b7a7bbeb671009a0505be547a5ad553accbebf037df3f5a4aec1f9673fc4ff0069
7
+ data.tar.gz: c3363629bf98d6fd55ac380830c2b7721903361a021e0ee10c5ddf33566be27199dda7e569cdc6821110a2200d3804d62f85d936a99165b58584df52c955afe1
@@ -1,14 +1,20 @@
1
1
  name: build
2
- on: [push, pull_request]
2
+ on:
3
+ push:
4
+ branches:
5
+ - master
6
+ pull_request:
7
+ branches:
8
+ - master
3
9
  jobs:
4
10
  build:
5
11
  strategy:
6
12
  fail-fast: false
7
13
  matrix:
8
14
  include:
9
- - ruby: 3.2
15
+ - ruby: 3.3
10
16
  gemfile: Gemfile
11
- - ruby: 3.1
17
+ - ruby: 3.2
12
18
  gemfile: Gemfile
13
19
  runs-on: ubuntu-latest
14
20
  env:
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.2.2
1
+ 3.3.4
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 3.0.6 (2024-07-12)
2
+
3
+ - Stop depending on Active::Support (#present?)
4
+
1
5
  ## 3.0.3 (2023-04-27)
2
6
 
3
7
  - Add support for extracting text from tables in DOCX files.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- simple_text_extract (3.0.5)
4
+ simple_text_extract (3.0.7)
5
5
  roo (~> 2.10.0)
6
6
  rubyzip (~> 2.3.2)
7
7
  spreadsheet (~> 1.3.0)
@@ -10,54 +10,56 @@ GEM
10
10
  remote: https://rubygems.org/
11
11
  specs:
12
12
  ast (2.4.2)
13
- base64 (0.1.1)
13
+ bigdecimal (3.1.8)
14
14
  coderay (1.1.3)
15
- json (2.6.3)
15
+ json (2.7.2)
16
16
  language_server-protocol (3.17.0.3)
17
- memory_profiler (1.0.1)
18
- method_source (1.0.0)
19
- minitest (5.20.0)
20
- mocha (2.1.0)
17
+ memory_profiler (1.0.2)
18
+ method_source (1.1.0)
19
+ minitest (5.24.1)
20
+ mocha (2.4.0)
21
21
  ruby2_keywords (>= 0.0.5)
22
- nokogiri (1.15.4-arm64-darwin)
22
+ nokogiri (1.16.6-arm64-darwin)
23
23
  racc (~> 1.4)
24
- nokogiri (1.15.4-x86_64-linux)
24
+ nokogiri (1.16.6-x86_64-linux)
25
25
  racc (~> 1.4)
26
- parallel (1.23.0)
27
- parser (3.2.2.4)
26
+ parallel (1.25.1)
27
+ parser (3.3.4.0)
28
28
  ast (~> 2.4.1)
29
29
  racc
30
30
  pry (0.14.2)
31
31
  coderay (~> 1.1)
32
32
  method_source (~> 1.0)
33
- racc (1.7.1)
33
+ racc (1.8.0)
34
34
  rainbow (3.1.1)
35
- rake (13.0.6)
36
- regexp_parser (2.8.2)
37
- rexml (3.2.6)
38
- roo (2.10.0)
35
+ rake (13.2.1)
36
+ regexp_parser (2.9.2)
37
+ rexml (3.3.1)
38
+ strscan
39
+ roo (2.10.1)
39
40
  nokogiri (~> 1)
40
41
  rubyzip (>= 1.3.0, < 3.0.0)
41
- rubocop (1.57.1)
42
- base64 (~> 0.1.1)
42
+ rubocop (1.65.0)
43
43
  json (~> 2.3)
44
44
  language_server-protocol (>= 3.17.0)
45
45
  parallel (~> 1.10)
46
- parser (>= 3.2.2.4)
46
+ parser (>= 3.3.0.2)
47
47
  rainbow (>= 2.2.2, < 4.0)
48
- regexp_parser (>= 1.8, < 3.0)
48
+ regexp_parser (>= 2.4, < 3.0)
49
49
  rexml (>= 3.2.5, < 4.0)
50
- rubocop-ast (>= 1.28.1, < 2.0)
50
+ rubocop-ast (>= 1.31.1, < 2.0)
51
51
  ruby-progressbar (~> 1.7)
52
52
  unicode-display_width (>= 2.4.0, < 3.0)
53
- rubocop-ast (1.29.0)
54
- parser (>= 3.2.1.0)
55
- ruby-ole (1.2.12.2)
53
+ rubocop-ast (1.31.3)
54
+ parser (>= 3.3.1.0)
55
+ ruby-ole (1.2.13.1)
56
56
  ruby-progressbar (1.13.0)
57
57
  ruby2_keywords (0.0.5)
58
58
  rubyzip (2.3.2)
59
- spreadsheet (1.3.0)
59
+ spreadsheet (1.3.1)
60
+ bigdecimal
60
61
  ruby-ole
62
+ strscan (3.1.0)
61
63
  unicode-display_width (2.5.0)
62
64
 
63
65
  PLATFORMS
@@ -75,4 +77,4 @@ DEPENDENCIES
75
77
  simple_text_extract!
76
78
 
77
79
  BUNDLED WITH
78
- 2.4.10
80
+ 2.5.11
@@ -78,7 +78,7 @@ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
78
78
  def pdf_extract
79
79
  return nil if SimpleTextExtract.missing_dependency?("pdftotext")
80
80
 
81
- `pdftotext #{Shellwords.escape(file.path)} -`
81
+ `pdftotext #{Shellwords.escape(file.path)} - 2>/dev/null`
82
82
  end
83
83
 
84
84
  def xlsx_extract
@@ -93,7 +93,7 @@ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
93
93
  text << "# Sheet Name: #{name}"
94
94
 
95
95
  spreadsheet.sheet(name)&.each_row_streaming do |row|
96
- text << row.filter(&:present?).join(" ")
96
+ text << row.map(&:to_s).join(" ")
97
97
  end
98
98
  end
99
99
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module SimpleTextExtract
4
- VERSION = "3.0.5"
4
+ VERSION = "3.0.7"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: simple_text_extract
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.5
4
+ version: 3.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nick Weiland
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-23 00:00:00.000000000 Z
11
+ date: 2024-07-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: roo
@@ -60,7 +60,7 @@ extensions: []
60
60
  extra_rdoc_files: []
61
61
  files:
62
62
  - "-"
63
- - ".github/workflows/build.yml"
63
+ - ".github/workflows/test.yml"
64
64
  - ".gitignore"
65
65
  - ".rubocop.yml"
66
66
  - ".ruby-version"
@@ -97,7 +97,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
97
97
  requirements:
98
98
  - antiword
99
99
  - pdftotext/poppler
100
- rubygems_version: 3.4.10
100
+ rubygems_version: 3.5.15
101
101
  signing_key:
102
102
  specification_version: 4
103
103
  summary: Extract text from various file types before resorting to an OCR solution.