simple_text_extract 3.0.6 → 3.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +9 -3
- data/.rubocop.yml +3 -0
- data/.ruby-version +1 -1
- data/Gemfile.lock +2 -2
- data/lib/simple_text_extract/extract.rb +4 -3
- data/lib/simple_text_extract/version.rb +1 -1
- data/simple_text_extract.gemspec +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 44412a154f56b1100983582f15f3d56800edf040dfac98a5ae9fb9b3fa2d6401
|
4
|
+
data.tar.gz: a6c5b2d94e13be12c71d3b4a66d0c2aef8fc6b7d7084a4c2b2d490e49954916d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88e630c8aeee64240196e96c93675ba15a1a7c2f3c2c1a67ab51a05d35dc404a11574f61995628b2b399cd4d42570d40beddb55c0b6cda0bccece97415d995a7
|
7
|
+
data.tar.gz: 42ff8b4a4702c52702ac95b2bab6b79495384cdec6d9f68011fb04bfca778cf91442c010ab67ac0a1ee59d27148b00be43221fbd20392d6e96f53d4ec5ae7788
|
data/.github/workflows/test.yml
CHANGED
@@ -1,14 +1,20 @@
|
|
1
1
|
name: build
|
2
|
-
on:
|
2
|
+
on:
|
3
|
+
push:
|
4
|
+
branches:
|
5
|
+
- master
|
6
|
+
pull_request:
|
7
|
+
branches:
|
8
|
+
- master
|
3
9
|
jobs:
|
4
10
|
build:
|
5
11
|
strategy:
|
6
12
|
fail-fast: false
|
7
13
|
matrix:
|
8
14
|
include:
|
9
|
-
- ruby: 3.
|
15
|
+
- ruby: 3.3
|
10
16
|
gemfile: Gemfile
|
11
|
-
- ruby: 3.
|
17
|
+
- ruby: 3.2
|
12
18
|
gemfile: Gemfile
|
13
19
|
runs-on: ubuntu-latest
|
14
20
|
env:
|
data/.rubocop.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.
|
1
|
+
3.3.4
|
data/Gemfile.lock
CHANGED
@@ -78,7 +78,7 @@ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
|
|
78
78
|
def pdf_extract
|
79
79
|
return nil if SimpleTextExtract.missing_dependency?("pdftotext")
|
80
80
|
|
81
|
-
`pdftotext #{Shellwords.escape(file.path)}
|
81
|
+
`pdftotext #{Shellwords.escape(file.path)} - 2>/dev/null`
|
82
82
|
end
|
83
83
|
|
84
84
|
def xlsx_extract
|
@@ -87,13 +87,14 @@ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
|
|
87
87
|
spreadsheet = Roo::Spreadsheet.open(file, only_visible_sheets: true)
|
88
88
|
|
89
89
|
text = []
|
90
|
-
|
91
90
|
spreadsheet.sheets.each_with_index do |name, i|
|
92
91
|
text << "# Sheet Index: #{i}"
|
93
92
|
text << "# Sheet Name: #{name}"
|
94
93
|
|
95
94
|
spreadsheet.sheet(name)&.each_row_streaming do |row|
|
96
|
-
text << row.map
|
95
|
+
text << row.map do |cell|
|
96
|
+
cell.value.to_s
|
97
|
+
end.join(" ")
|
97
98
|
end
|
98
99
|
end
|
99
100
|
|
data/simple_text_extract.gemspec
CHANGED
@@ -27,6 +27,6 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.requirements << "pdftotext/poppler"
|
28
28
|
|
29
29
|
spec.add_dependency "roo", "~> 2.10.0"
|
30
|
-
spec.add_dependency "spreadsheet", "~> 1.3.0"
|
31
30
|
spec.add_dependency "rubyzip", "~> 2.3.2"
|
31
|
+
spec.add_dependency "spreadsheet", "~> 1.3.0"
|
32
32
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_text_extract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Weiland
|
@@ -25,33 +25,33 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 2.10.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: rubyzip
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 2.3.2
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 2.3.2
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: spreadsheet
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 1.3.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 1.3.0
|
55
55
|
description: Extract text from various file types before resorting to an OCR solution.
|
56
56
|
email:
|
57
57
|
- nickweiland@gmail.com
|
@@ -97,7 +97,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
97
97
|
requirements:
|
98
98
|
- antiword
|
99
99
|
- pdftotext/poppler
|
100
|
-
rubygems_version: 3.5.
|
100
|
+
rubygems_version: 3.5.15
|
101
101
|
signing_key:
|
102
102
|
specification_version: 4
|
103
103
|
summary: Extract text from various file types before resorting to an OCR solution.
|