plaintext 0.3.1 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.travis.yml +2 -2
- data/CHANGELOG +7 -0
- data/lib/plaintext/file_handler/external_command_handler.rb +29 -5
- data/lib/plaintext/file_handler/external_command_handler/pdf_handler.rb +7 -0
- data/lib/plaintext/version.rb +1 -1
- data/plaintext.gemspec +10 -9
- metadata +27 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: '048b008deb0b8bb0ff130916081610a36a130db002171c96c1243f79055127c0'
|
4
|
+
data.tar.gz: 7972e2a4aab310d79211fb1ffe302973c37601aefcdcc69825b713f1e12cccc4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 815b42f1d693c14e724e4d71d9bae7aef677d3afbd2a3c6a1eee02405299735ec70ac555537ac8d6e862889237f6f98e06ab92159ebfaf0a8027eada00a18b57
|
7
|
+
data.tar.gz: 1a2e6b0dd527678b0612b608834a9c682cf14787b81bd097c0fb86cd47e8786e983ca08efdfad9184066f9ef3d91d3c908d1715f591b228deab87b7bcbab89a9
|
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
@@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [0.3.2] - 2019-09-02
|
10
|
+
- Set minimum Nokogiri version to 1.10.4. See CVE-2019-5477.
|
11
|
+
- Fix encoding issues for PDFs.
|
12
|
+
- Bump development dependencies to bundler version 2 and
|
13
|
+
rake version 12
|
14
|
+
- Update travis file to use ruby 2.6.4 and bundler 2.0.1
|
15
|
+
|
9
16
|
## [0.3.1] - 2019-01-16
|
10
17
|
|
11
18
|
### Added
|
@@ -11,21 +11,23 @@ module Plaintext
|
|
11
11
|
# Due to how popen works the command will be executed directly without
|
12
12
|
# involving the shell if cmd is an array.
|
13
13
|
require 'fileutils'
|
14
|
+
|
15
|
+
FILE_PLACEHOLDER = '__FILE__'.freeze
|
16
|
+
DEFAULT_STREAM_ENCODING = 'ASCII-8BIT'.freeze
|
17
|
+
|
14
18
|
def shellout(cmd, options = {}, &block)
|
15
19
|
mode = "r+"
|
16
20
|
IO.popen(cmd, mode) do |io|
|
17
|
-
|
21
|
+
set_stream_encoding(io)
|
18
22
|
io.close_write unless options[:write_stdin]
|
19
23
|
block.call(io) if block_given?
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
23
|
-
FILE_PLACEHOLDER = '__FILE__'.freeze
|
24
|
-
|
25
27
|
def text(file, options = {})
|
26
28
|
cmd = @command.dup
|
27
29
|
cmd[cmd.index(FILE_PLACEHOLDER)] = Pathname(file).to_s
|
28
|
-
shellout(cmd){ |io| read io, options[:max_size] }.to_s
|
30
|
+
shellout(cmd) { |io| read io, options[:max_size] }.to_s
|
29
31
|
end
|
30
32
|
|
31
33
|
|
@@ -41,10 +43,32 @@ module Plaintext
|
|
41
43
|
new.available?
|
42
44
|
end
|
43
45
|
|
46
|
+
protected
|
47
|
+
|
48
|
+
def utf8_stream?
|
49
|
+
false
|
50
|
+
end
|
51
|
+
|
44
52
|
private
|
45
53
|
|
54
|
+
def set_stream_encoding(io)
|
55
|
+
return unless io.respond_to?(:set_encoding)
|
56
|
+
|
57
|
+
if utf8_stream?
|
58
|
+
io.set_encoding('UTF-8'.freeze)
|
59
|
+
else
|
60
|
+
io.set_encoding(DEFAULT_STREAM_ENCODING)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
46
64
|
def read(io, max_size = nil)
|
47
|
-
|
65
|
+
piece = io.read(max_size)
|
66
|
+
|
67
|
+
if utf8_stream?
|
68
|
+
piece
|
69
|
+
else
|
70
|
+
Plaintext::CodesetUtil.to_utf8 piece, DEFAULT_STREAM_ENCODING
|
71
|
+
end
|
48
72
|
end
|
49
73
|
end
|
50
74
|
end
|
@@ -5,9 +5,16 @@ module Plaintext
|
|
5
5
|
DEFAULT = [
|
6
6
|
'/usr/bin/pdftotext', '-enc', 'UTF-8', '__FILE__', '-'
|
7
7
|
].freeze
|
8
|
+
|
8
9
|
def initialize
|
9
10
|
@content_type = 'application/pdf'
|
10
11
|
@command = Plaintext::Configuration['pdftotext'] || DEFAULT
|
11
12
|
end
|
13
|
+
|
14
|
+
protected
|
15
|
+
|
16
|
+
def utf8_stream?
|
17
|
+
true
|
18
|
+
end
|
12
19
|
end
|
13
20
|
end
|
data/lib/plaintext/version.rb
CHANGED
data/plaintext.gemspec
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'plaintext/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
7
|
+
spec.name = 'plaintext'
|
8
8
|
spec.version = Plaintext::VERSION
|
9
9
|
spec.authors = ['Jens Krämer', 'Planio GmbH', 'OpenProject GmbH']
|
10
10
|
spec.email = ['info@openproject.com']
|
@@ -12,17 +12,18 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.summary = 'Extract plain text from most common office documents.'
|
13
13
|
spec.description = "Extract text from common office files. Based on the file's content type a command line tool is selected to do the job."
|
14
14
|
spec.homepage = 'https://github.com/planio-gmbh/plaintext'
|
15
|
-
|
15
|
+
spec.license = 'GPL-2.0'
|
16
|
+
|
16
17
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
-
spec.bindir =
|
18
|
+
spec.bindir = 'exe'
|
18
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
-
spec.require_paths = [
|
20
|
+
spec.require_paths = ['lib']
|
20
21
|
|
21
|
-
spec.add_dependency 'rubyzip', '~> 1.2.1'
|
22
|
-
spec.add_dependency 'nokogiri', '~> 1.8'
|
23
22
|
spec.add_dependency 'activesupport', '>2.2.1 '
|
23
|
+
spec.add_dependency 'nokogiri', '~> 1.10', '>= 1.10.4'
|
24
|
+
spec.add_dependency 'rubyzip', '~> 1.2.1'
|
24
25
|
|
25
|
-
spec.add_development_dependency
|
26
|
-
spec.add_development_dependency
|
27
|
-
spec.add_development_dependency
|
26
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
27
|
+
spec.add_development_dependency 'rake', '~> 12.0'
|
28
|
+
spec.add_development_dependency 'rspec'
|
28
29
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: plaintext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Krämer
|
@@ -10,78 +10,84 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2019-
|
13
|
+
date: 2019-09-03 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
16
|
+
name: activesupport
|
17
17
|
requirement: !ruby/object:Gem::Requirement
|
18
18
|
requirements:
|
19
|
-
- - "
|
19
|
+
- - ">"
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
21
|
+
version: 2.2.1
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
requirements:
|
26
|
-
- - "
|
26
|
+
- - ">"
|
27
27
|
- !ruby/object:Gem::Version
|
28
|
-
version:
|
28
|
+
version: 2.2.1
|
29
29
|
- !ruby/object:Gem::Dependency
|
30
30
|
name: nokogiri
|
31
31
|
requirement: !ruby/object:Gem::Requirement
|
32
32
|
requirements:
|
33
33
|
- - "~>"
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version: '1.
|
35
|
+
version: '1.10'
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: 1.10.4
|
36
39
|
type: :runtime
|
37
40
|
prerelease: false
|
38
41
|
version_requirements: !ruby/object:Gem::Requirement
|
39
42
|
requirements:
|
40
43
|
- - "~>"
|
41
44
|
- !ruby/object:Gem::Version
|
42
|
-
version: '1.
|
45
|
+
version: '1.10'
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 1.10.4
|
43
49
|
- !ruby/object:Gem::Dependency
|
44
|
-
name:
|
50
|
+
name: rubyzip
|
45
51
|
requirement: !ruby/object:Gem::Requirement
|
46
52
|
requirements:
|
47
|
-
- - "
|
53
|
+
- - "~>"
|
48
54
|
- !ruby/object:Gem::Version
|
49
|
-
version:
|
55
|
+
version: 1.2.1
|
50
56
|
type: :runtime
|
51
57
|
prerelease: false
|
52
58
|
version_requirements: !ruby/object:Gem::Requirement
|
53
59
|
requirements:
|
54
|
-
- - "
|
60
|
+
- - "~>"
|
55
61
|
- !ruby/object:Gem::Version
|
56
|
-
version:
|
62
|
+
version: 1.2.1
|
57
63
|
- !ruby/object:Gem::Dependency
|
58
64
|
name: bundler
|
59
65
|
requirement: !ruby/object:Gem::Requirement
|
60
66
|
requirements:
|
61
67
|
- - "~>"
|
62
68
|
- !ruby/object:Gem::Version
|
63
|
-
version: '
|
69
|
+
version: '2.0'
|
64
70
|
type: :development
|
65
71
|
prerelease: false
|
66
72
|
version_requirements: !ruby/object:Gem::Requirement
|
67
73
|
requirements:
|
68
74
|
- - "~>"
|
69
75
|
- !ruby/object:Gem::Version
|
70
|
-
version: '
|
76
|
+
version: '2.0'
|
71
77
|
- !ruby/object:Gem::Dependency
|
72
78
|
name: rake
|
73
79
|
requirement: !ruby/object:Gem::Requirement
|
74
80
|
requirements:
|
75
81
|
- - "~>"
|
76
82
|
- !ruby/object:Gem::Version
|
77
|
-
version: '
|
83
|
+
version: '12.0'
|
78
84
|
type: :development
|
79
85
|
prerelease: false
|
80
86
|
version_requirements: !ruby/object:Gem::Requirement
|
81
87
|
requirements:
|
82
88
|
- - "~>"
|
83
89
|
- !ruby/object:Gem::Version
|
84
|
-
version: '
|
90
|
+
version: '12.0'
|
85
91
|
- !ruby/object:Gem::Dependency
|
86
92
|
name: rspec
|
87
93
|
requirement: !ruby/object:Gem::Requirement
|
@@ -137,7 +143,8 @@ files:
|
|
137
143
|
- plaintext.gemspec
|
138
144
|
- plaintext.yml.example
|
139
145
|
homepage: https://github.com/planio-gmbh/plaintext
|
140
|
-
licenses:
|
146
|
+
licenses:
|
147
|
+
- GPL-2.0
|
141
148
|
metadata: {}
|
142
149
|
post_install_message:
|
143
150
|
rdoc_options: []
|
@@ -154,8 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
161
|
- !ruby/object:Gem::Version
|
155
162
|
version: '0'
|
156
163
|
requirements: []
|
157
|
-
|
158
|
-
rubygems_version: 2.4.5.5
|
164
|
+
rubygems_version: 3.0.1
|
159
165
|
signing_key:
|
160
166
|
specification_version: 4
|
161
167
|
summary: Extract plain text from most common office documents.
|