plaintext 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.travis.yml +2 -2
- data/CHANGELOG +7 -0
- data/lib/plaintext/file_handler/external_command_handler.rb +29 -5
- data/lib/plaintext/file_handler/external_command_handler/pdf_handler.rb +7 -0
- data/lib/plaintext/version.rb +1 -1
- data/plaintext.gemspec +10 -9
- metadata +27 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: '048b008deb0b8bb0ff130916081610a36a130db002171c96c1243f79055127c0'
|
4
|
+
data.tar.gz: 7972e2a4aab310d79211fb1ffe302973c37601aefcdcc69825b713f1e12cccc4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 815b42f1d693c14e724e4d71d9bae7aef677d3afbd2a3c6a1eee02405299735ec70ac555537ac8d6e862889237f6f98e06ab92159ebfaf0a8027eada00a18b57
|
7
|
+
data.tar.gz: 1a2e6b0dd527678b0612b608834a9c682cf14787b81bd097c0fb86cd47e8786e983ca08efdfad9184066f9ef3d91d3c908d1715f591b228deab87b7bcbab89a9
|
data/.travis.yml
CHANGED
data/CHANGELOG
CHANGED
@@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [0.3.2] - 2019-09-02
|
10
|
+
- Set minimum Nokogiri version to 1.10.4. See CVE-2019-5477.
|
11
|
+
- Fix encoding issues for PDFs.
|
12
|
+
- Bump development dependencies to bundler version 2 and
|
13
|
+
rake version 12
|
14
|
+
- Update travis file to use ruby 2.6.4 and bundler 2.0.1
|
15
|
+
|
9
16
|
## [0.3.1] - 2019-01-16
|
10
17
|
|
11
18
|
### Added
|
@@ -11,21 +11,23 @@ module Plaintext
|
|
11
11
|
# Due to how popen works the command will be executed directly without
|
12
12
|
# involving the shell if cmd is an array.
|
13
13
|
require 'fileutils'
|
14
|
+
|
15
|
+
FILE_PLACEHOLDER = '__FILE__'.freeze
|
16
|
+
DEFAULT_STREAM_ENCODING = 'ASCII-8BIT'.freeze
|
17
|
+
|
14
18
|
def shellout(cmd, options = {}, &block)
|
15
19
|
mode = "r+"
|
16
20
|
IO.popen(cmd, mode) do |io|
|
17
|
-
|
21
|
+
set_stream_encoding(io)
|
18
22
|
io.close_write unless options[:write_stdin]
|
19
23
|
block.call(io) if block_given?
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
23
|
-
FILE_PLACEHOLDER = '__FILE__'.freeze
|
24
|
-
|
25
27
|
def text(file, options = {})
|
26
28
|
cmd = @command.dup
|
27
29
|
cmd[cmd.index(FILE_PLACEHOLDER)] = Pathname(file).to_s
|
28
|
-
shellout(cmd){ |io| read io, options[:max_size] }.to_s
|
30
|
+
shellout(cmd) { |io| read io, options[:max_size] }.to_s
|
29
31
|
end
|
30
32
|
|
31
33
|
|
@@ -41,10 +43,32 @@ module Plaintext
|
|
41
43
|
new.available?
|
42
44
|
end
|
43
45
|
|
46
|
+
protected
|
47
|
+
|
48
|
+
def utf8_stream?
|
49
|
+
false
|
50
|
+
end
|
51
|
+
|
44
52
|
private
|
45
53
|
|
54
|
+
def set_stream_encoding(io)
|
55
|
+
return unless io.respond_to?(:set_encoding)
|
56
|
+
|
57
|
+
if utf8_stream?
|
58
|
+
io.set_encoding('UTF-8'.freeze)
|
59
|
+
else
|
60
|
+
io.set_encoding(DEFAULT_STREAM_ENCODING)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
46
64
|
def read(io, max_size = nil)
|
47
|
-
|
65
|
+
piece = io.read(max_size)
|
66
|
+
|
67
|
+
if utf8_stream?
|
68
|
+
piece
|
69
|
+
else
|
70
|
+
Plaintext::CodesetUtil.to_utf8 piece, DEFAULT_STREAM_ENCODING
|
71
|
+
end
|
48
72
|
end
|
49
73
|
end
|
50
74
|
end
|
@@ -5,9 +5,16 @@ module Plaintext
|
|
5
5
|
DEFAULT = [
|
6
6
|
'/usr/bin/pdftotext', '-enc', 'UTF-8', '__FILE__', '-'
|
7
7
|
].freeze
|
8
|
+
|
8
9
|
def initialize
|
9
10
|
@content_type = 'application/pdf'
|
10
11
|
@command = Plaintext::Configuration['pdftotext'] || DEFAULT
|
11
12
|
end
|
13
|
+
|
14
|
+
protected
|
15
|
+
|
16
|
+
def utf8_stream?
|
17
|
+
true
|
18
|
+
end
|
12
19
|
end
|
13
20
|
end
|
data/lib/plaintext/version.rb
CHANGED
data/plaintext.gemspec
CHANGED
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
require 'plaintext/version'
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name =
|
7
|
+
spec.name = 'plaintext'
|
8
8
|
spec.version = Plaintext::VERSION
|
9
9
|
spec.authors = ['Jens Krämer', 'Planio GmbH', 'OpenProject GmbH']
|
10
10
|
spec.email = ['info@openproject.com']
|
@@ -12,17 +12,18 @@ Gem::Specification.new do |spec|
|
|
12
12
|
spec.summary = 'Extract plain text from most common office documents.'
|
13
13
|
spec.description = "Extract text from common office files. Based on the file's content type a command line tool is selected to do the job."
|
14
14
|
spec.homepage = 'https://github.com/planio-gmbh/plaintext'
|
15
|
-
|
15
|
+
spec.license = 'GPL-2.0'
|
16
|
+
|
16
17
|
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
17
|
-
spec.bindir =
|
18
|
+
spec.bindir = 'exe'
|
18
19
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
|
-
spec.require_paths = [
|
20
|
+
spec.require_paths = ['lib']
|
20
21
|
|
21
|
-
spec.add_dependency 'rubyzip', '~> 1.2.1'
|
22
|
-
spec.add_dependency 'nokogiri', '~> 1.8'
|
23
22
|
spec.add_dependency 'activesupport', '>2.2.1 '
|
23
|
+
spec.add_dependency 'nokogiri', '~> 1.10', '>= 1.10.4'
|
24
|
+
spec.add_dependency 'rubyzip', '~> 1.2.1'
|
24
25
|
|
25
|
-
spec.add_development_dependency
|
26
|
-
spec.add_development_dependency
|
27
|
-
spec.add_development_dependency
|
26
|
+
spec.add_development_dependency 'bundler', '~> 2.0'
|
27
|
+
spec.add_development_dependency 'rake', '~> 12.0'
|
28
|
+
spec.add_development_dependency 'rspec'
|
28
29
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: plaintext
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Krämer
|
@@ -10,78 +10,84 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: exe
|
12
12
|
cert_chain: []
|
13
|
-
date: 2019-
|
13
|
+
date: 2019-09-03 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
16
|
+
name: activesupport
|
17
17
|
requirement: !ruby/object:Gem::Requirement
|
18
18
|
requirements:
|
19
|
-
- - "
|
19
|
+
- - ">"
|
20
20
|
- !ruby/object:Gem::Version
|
21
|
-
version:
|
21
|
+
version: 2.2.1
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
25
|
requirements:
|
26
|
-
- - "
|
26
|
+
- - ">"
|
27
27
|
- !ruby/object:Gem::Version
|
28
|
-
version:
|
28
|
+
version: 2.2.1
|
29
29
|
- !ruby/object:Gem::Dependency
|
30
30
|
name: nokogiri
|
31
31
|
requirement: !ruby/object:Gem::Requirement
|
32
32
|
requirements:
|
33
33
|
- - "~>"
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version: '1.
|
35
|
+
version: '1.10'
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: 1.10.4
|
36
39
|
type: :runtime
|
37
40
|
prerelease: false
|
38
41
|
version_requirements: !ruby/object:Gem::Requirement
|
39
42
|
requirements:
|
40
43
|
- - "~>"
|
41
44
|
- !ruby/object:Gem::Version
|
42
|
-
version: '1.
|
45
|
+
version: '1.10'
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: 1.10.4
|
43
49
|
- !ruby/object:Gem::Dependency
|
44
|
-
name:
|
50
|
+
name: rubyzip
|
45
51
|
requirement: !ruby/object:Gem::Requirement
|
46
52
|
requirements:
|
47
|
-
- - "
|
53
|
+
- - "~>"
|
48
54
|
- !ruby/object:Gem::Version
|
49
|
-
version:
|
55
|
+
version: 1.2.1
|
50
56
|
type: :runtime
|
51
57
|
prerelease: false
|
52
58
|
version_requirements: !ruby/object:Gem::Requirement
|
53
59
|
requirements:
|
54
|
-
- - "
|
60
|
+
- - "~>"
|
55
61
|
- !ruby/object:Gem::Version
|
56
|
-
version:
|
62
|
+
version: 1.2.1
|
57
63
|
- !ruby/object:Gem::Dependency
|
58
64
|
name: bundler
|
59
65
|
requirement: !ruby/object:Gem::Requirement
|
60
66
|
requirements:
|
61
67
|
- - "~>"
|
62
68
|
- !ruby/object:Gem::Version
|
63
|
-
version: '
|
69
|
+
version: '2.0'
|
64
70
|
type: :development
|
65
71
|
prerelease: false
|
66
72
|
version_requirements: !ruby/object:Gem::Requirement
|
67
73
|
requirements:
|
68
74
|
- - "~>"
|
69
75
|
- !ruby/object:Gem::Version
|
70
|
-
version: '
|
76
|
+
version: '2.0'
|
71
77
|
- !ruby/object:Gem::Dependency
|
72
78
|
name: rake
|
73
79
|
requirement: !ruby/object:Gem::Requirement
|
74
80
|
requirements:
|
75
81
|
- - "~>"
|
76
82
|
- !ruby/object:Gem::Version
|
77
|
-
version: '
|
83
|
+
version: '12.0'
|
78
84
|
type: :development
|
79
85
|
prerelease: false
|
80
86
|
version_requirements: !ruby/object:Gem::Requirement
|
81
87
|
requirements:
|
82
88
|
- - "~>"
|
83
89
|
- !ruby/object:Gem::Version
|
84
|
-
version: '
|
90
|
+
version: '12.0'
|
85
91
|
- !ruby/object:Gem::Dependency
|
86
92
|
name: rspec
|
87
93
|
requirement: !ruby/object:Gem::Requirement
|
@@ -137,7 +143,8 @@ files:
|
|
137
143
|
- plaintext.gemspec
|
138
144
|
- plaintext.yml.example
|
139
145
|
homepage: https://github.com/planio-gmbh/plaintext
|
140
|
-
licenses:
|
146
|
+
licenses:
|
147
|
+
- GPL-2.0
|
141
148
|
metadata: {}
|
142
149
|
post_install_message:
|
143
150
|
rdoc_options: []
|
@@ -154,8 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
161
|
- !ruby/object:Gem::Version
|
155
162
|
version: '0'
|
156
163
|
requirements: []
|
157
|
-
|
158
|
-
rubygems_version: 2.4.5.5
|
164
|
+
rubygems_version: 3.0.1
|
159
165
|
signing_key:
|
160
166
|
specification_version: 4
|
161
167
|
summary: Extract plain text from most common office documents.
|