simple_text_extract 3.0.6 → 3.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/test.yml +9 -3
- data/.rubocop.yml +3 -0
- data/.ruby-version +1 -1
- data/Gemfile.lock +2 -2
- data/lib/simple_text_extract/extract.rb +4 -3
- data/lib/simple_text_extract/version.rb +1 -1
- data/simple_text_extract.gemspec +1 -1
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 44412a154f56b1100983582f15f3d56800edf040dfac98a5ae9fb9b3fa2d6401
|
4
|
+
data.tar.gz: a6c5b2d94e13be12c71d3b4a66d0c2aef8fc6b7d7084a4c2b2d490e49954916d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 88e630c8aeee64240196e96c93675ba15a1a7c2f3c2c1a67ab51a05d35dc404a11574f61995628b2b399cd4d42570d40beddb55c0b6cda0bccece97415d995a7
|
7
|
+
data.tar.gz: 42ff8b4a4702c52702ac95b2bab6b79495384cdec6d9f68011fb04bfca778cf91442c010ab67ac0a1ee59d27148b00be43221fbd20392d6e96f53d4ec5ae7788
|
data/.github/workflows/test.yml
CHANGED
@@ -1,14 +1,20 @@
|
|
1
1
|
name: build
|
2
|
-
on:
|
2
|
+
on:
|
3
|
+
push:
|
4
|
+
branches:
|
5
|
+
- master
|
6
|
+
pull_request:
|
7
|
+
branches:
|
8
|
+
- master
|
3
9
|
jobs:
|
4
10
|
build:
|
5
11
|
strategy:
|
6
12
|
fail-fast: false
|
7
13
|
matrix:
|
8
14
|
include:
|
9
|
-
- ruby: 3.
|
15
|
+
- ruby: 3.3
|
10
16
|
gemfile: Gemfile
|
11
|
-
- ruby: 3.
|
17
|
+
- ruby: 3.2
|
12
18
|
gemfile: Gemfile
|
13
19
|
runs-on: ubuntu-latest
|
14
20
|
env:
|
data/.rubocop.yml
CHANGED
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
3.
|
1
|
+
3.3.4
|
data/Gemfile.lock
CHANGED
@@ -78,7 +78,7 @@ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
|
|
78
78
|
def pdf_extract
|
79
79
|
return nil if SimpleTextExtract.missing_dependency?("pdftotext")
|
80
80
|
|
81
|
-
`pdftotext #{Shellwords.escape(file.path)}
|
81
|
+
`pdftotext #{Shellwords.escape(file.path)} - 2>/dev/null`
|
82
82
|
end
|
83
83
|
|
84
84
|
def xlsx_extract
|
@@ -87,13 +87,14 @@ class SimpleTextExtract::Extract # rubocop:disable Metrics/ClassLength
|
|
87
87
|
spreadsheet = Roo::Spreadsheet.open(file, only_visible_sheets: true)
|
88
88
|
|
89
89
|
text = []
|
90
|
-
|
91
90
|
spreadsheet.sheets.each_with_index do |name, i|
|
92
91
|
text << "# Sheet Index: #{i}"
|
93
92
|
text << "# Sheet Name: #{name}"
|
94
93
|
|
95
94
|
spreadsheet.sheet(name)&.each_row_streaming do |row|
|
96
|
-
text << row.map
|
95
|
+
text << row.map do |cell|
|
96
|
+
cell.value.to_s
|
97
|
+
end.join(" ")
|
97
98
|
end
|
98
99
|
end
|
99
100
|
|
data/simple_text_extract.gemspec
CHANGED
@@ -27,6 +27,6 @@ Gem::Specification.new do |spec|
|
|
27
27
|
spec.requirements << "pdftotext/poppler"
|
28
28
|
|
29
29
|
spec.add_dependency "roo", "~> 2.10.0"
|
30
|
-
spec.add_dependency "spreadsheet", "~> 1.3.0"
|
31
30
|
spec.add_dependency "rubyzip", "~> 2.3.2"
|
31
|
+
spec.add_dependency "spreadsheet", "~> 1.3.0"
|
32
32
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_text_extract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.0.
|
4
|
+
version: 3.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Weiland
|
@@ -25,33 +25,33 @@ dependencies:
|
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: 2.10.0
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
28
|
+
name: rubyzip
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 2.3.2
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 2.3.2
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: spreadsheet
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 1.3.0
|
48
48
|
type: :runtime
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
54
|
+
version: 1.3.0
|
55
55
|
description: Extract text from various file types before resorting to an OCR solution.
|
56
56
|
email:
|
57
57
|
- nickweiland@gmail.com
|
@@ -97,7 +97,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
97
97
|
requirements:
|
98
98
|
- antiword
|
99
99
|
- pdftotext/poppler
|
100
|
-
rubygems_version: 3.5.
|
100
|
+
rubygems_version: 3.5.15
|
101
101
|
signing_key:
|
102
102
|
specification_version: 4
|
103
103
|
summary: Extract text from various file types before resorting to an OCR solution.
|