simple_text_extract 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -0
- data/lib/simple_text_extract.rb +1 -1
- data/lib/simple_text_extract/format_extractor_factory.rb +1 -1
- data/lib/simple_text_extract/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 67a6daab9ba3d33ea757384fda1407875c1451cb2be0bb636ffea9b32384c12d
|
4
|
+
data.tar.gz: e5077817daf69f20d5ad54ae82b55465cf3727f5acb20df7382cc54403ca3e43
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7ef181da803d55ba917a5051402a3ac8527deb8886c68c417b4eabc677523a87fd011840944018c3c485a48c8dd2098b60960fc639b716481310c3ccc30f87a3
|
7
|
+
data.tar.gz: f73297a615714bbf29b48b87f0437f10c565078dd97d7ab0a07171f422233e5170f54e97eb19ea81f1e85b4777d52482807cc6efcc8c1737d31e52c41c59d778
|
data/README.md
CHANGED
@@ -9,6 +9,7 @@ SimpleTextExtract handles parsing text from:
|
|
9
9
|
- `.doc`
|
10
10
|
- `.xlsx`
|
11
11
|
- `.xls`
|
12
|
+
- `.csv`
|
12
13
|
- `.txt` 😜
|
13
14
|
|
14
15
|
If no text is parsed (for `pdf`), or a file format is not supported (like images), then `nil` is returned and you can move on to the heavy-duty tools like [Henkei](https://github.com/abrom/henkei) 💪.
|
data/lib/simple_text_extract.rb
CHANGED
@@ -5,7 +5,7 @@ require "simple_text_extract/text_extractor"
|
|
5
5
|
require "simple_text_extract/format_extractor_factory"
|
6
6
|
|
7
7
|
module SimpleTextExtract
|
8
|
-
SUPPORTED_FILETYPES = ["xls", "xlsx", "doc", "docx", "txt", "pdf"].freeze
|
8
|
+
SUPPORTED_FILETYPES = ["xls", "xlsx", "doc", "docx", "txt", "pdf", "csv"].freeze
|
9
9
|
|
10
10
|
class Error < StandardError; end
|
11
11
|
|
@@ -12,7 +12,7 @@ module SimpleTextExtract
|
|
12
12
|
class FormatExtractorFactory
|
13
13
|
def self.call(file) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity
|
14
14
|
case file.path
|
15
|
-
when
|
15
|
+
when /(.txt$|.csv$)/i
|
16
16
|
FormatExtractor::PlainText.new(file)
|
17
17
|
when /.pdf$/i
|
18
18
|
FormatExtractor::PDF.new(file)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: simple_text_extract
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nick Weiland
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: roo
|
@@ -133,7 +133,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
133
133
|
requirements:
|
134
134
|
- antiword
|
135
135
|
- pdftotext/poppler
|
136
|
-
rubygems_version: 3.0.
|
136
|
+
rubygems_version: 3.0.3
|
137
137
|
signing_key:
|
138
138
|
specification_version: 4
|
139
139
|
summary: Attempts to quickly extract text from various file types before resorting
|