plaintext 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 857600eeedad2b6b305655743d6dd4768faa5e5d
4
- data.tar.gz: 17c78e4c90da3f07f5637c92a0891feb54b25b5b
2
+ SHA256:
3
+ metadata.gz: '048b008deb0b8bb0ff130916081610a36a130db002171c96c1243f79055127c0'
4
+ data.tar.gz: 7972e2a4aab310d79211fb1ffe302973c37601aefcdcc69825b713f1e12cccc4
5
5
  SHA512:
6
- metadata.gz: 436883566c3f9e1598a3f482c9146195646aa4e5d123aaedfe15876bb2876e93dd4c5bc964c515ece9b5f4e93b25974745bc176bd1aafb51aa4c64084221c7d5
7
- data.tar.gz: 92969693830e20b3a1fb842bb9e60a5ed63a93f87903da8c7c88466df7a62e14dec0bb03f457ba7b28e3ce47bf82da3c9cc1a22fae4555a6cba6dce9ebc48405
6
+ metadata.gz: 815b42f1d693c14e724e4d71d9bae7aef677d3afbd2a3c6a1eee02405299735ec70ac555537ac8d6e862889237f6f98e06ab92159ebfaf0a8027eada00a18b57
7
+ data.tar.gz: 1a2e6b0dd527678b0612b608834a9c682cf14787b81bd097c0fb86cd47e8786e983ca08efdfad9184066f9ef3d91d3c908d1715f591b228deab87b7bcbab89a9
@@ -1,7 +1,7 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.2.3
3
+ - 2.6.4
4
4
  before_install:
5
5
  - sudo apt-get -qq update
6
6
  - sudo apt-get install -y catdoc unrtf poppler-utils tesseract-ocr
7
- - gem install bundler -v 1.10.6
7
+ - gem install bundler -v 2.0.1
data/CHANGELOG CHANGED
@@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.3.2] - 2019-09-02
10
+ - Set minimum Nokogiri version to 1.10.4. See CVE-2019-5477.
11
+ - Fix encoding issues for PDFs.
12
+ - Bump development dependencies to bundler version 2 and
13
+ rake version 12
14
+ - Update travis file to use ruby 2.6.4 and bundler 2.0.1
15
+
9
16
  ## [0.3.1] - 2019-01-16
10
17
 
11
18
  ### Added
@@ -11,21 +11,23 @@ module Plaintext
11
11
  # Due to how popen works the command will be executed directly without
12
12
  # involving the shell if cmd is an array.
13
13
  require 'fileutils'
14
+
15
+ FILE_PLACEHOLDER = '__FILE__'.freeze
16
+ DEFAULT_STREAM_ENCODING = 'ASCII-8BIT'.freeze
17
+
14
18
  def shellout(cmd, options = {}, &block)
15
19
  mode = "r+"
16
20
  IO.popen(cmd, mode) do |io|
17
- io.set_encoding("ASCII-8BIT") if io.respond_to?(:set_encoding)
21
+ set_stream_encoding(io)
18
22
  io.close_write unless options[:write_stdin]
19
23
  block.call(io) if block_given?
20
24
  end
21
25
  end
22
26
 
23
- FILE_PLACEHOLDER = '__FILE__'.freeze
24
-
25
27
  def text(file, options = {})
26
28
  cmd = @command.dup
27
29
  cmd[cmd.index(FILE_PLACEHOLDER)] = Pathname(file).to_s
28
- shellout(cmd){ |io| read io, options[:max_size] }.to_s
30
+ shellout(cmd) { |io| read io, options[:max_size] }.to_s
29
31
  end
30
32
 
31
33
 
@@ -41,10 +43,32 @@ module Plaintext
41
43
  new.available?
42
44
  end
43
45
 
46
+ protected
47
+
48
+ def utf8_stream?
49
+ false
50
+ end
51
+
44
52
  private
45
53
 
54
+ def set_stream_encoding(io)
55
+ return unless io.respond_to?(:set_encoding)
56
+
57
+ if utf8_stream?
58
+ io.set_encoding('UTF-8'.freeze)
59
+ else
60
+ io.set_encoding(DEFAULT_STREAM_ENCODING)
61
+ end
62
+ end
63
+
46
64
  def read(io, max_size = nil)
47
- Plaintext::CodesetUtil.to_utf8 io.read(max_size), "ASCII-8BIT"
65
+ piece = io.read(max_size)
66
+
67
+ if utf8_stream?
68
+ piece
69
+ else
70
+ Plaintext::CodesetUtil.to_utf8 piece, DEFAULT_STREAM_ENCODING
71
+ end
48
72
  end
49
73
  end
50
74
  end
@@ -5,9 +5,16 @@ module Plaintext
5
5
  DEFAULT = [
6
6
  '/usr/bin/pdftotext', '-enc', 'UTF-8', '__FILE__', '-'
7
7
  ].freeze
8
+
8
9
  def initialize
9
10
  @content_type = 'application/pdf'
10
11
  @command = Plaintext::Configuration['pdftotext'] || DEFAULT
11
12
  end
13
+
14
+ protected
15
+
16
+ def utf8_stream?
17
+ true
18
+ end
12
19
  end
13
20
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Plaintext
4
- VERSION = "0.3.1"
4
+ VERSION = "0.3.2"
5
5
  end
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'plaintext/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "plaintext"
7
+ spec.name = 'plaintext'
8
8
  spec.version = Plaintext::VERSION
9
9
  spec.authors = ['Jens Krämer', 'Planio GmbH', 'OpenProject GmbH']
10
10
  spec.email = ['info@openproject.com']
@@ -12,17 +12,18 @@ Gem::Specification.new do |spec|
12
12
  spec.summary = 'Extract plain text from most common office documents.'
13
13
  spec.description = "Extract text from common office files. Based on the file's content type a command line tool is selected to do the job."
14
14
  spec.homepage = 'https://github.com/planio-gmbh/plaintext'
15
-
15
+ spec.license = 'GPL-2.0'
16
+
16
17
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
- spec.bindir = "exe"
18
+ spec.bindir = 'exe'
18
19
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
- spec.require_paths = ["lib"]
20
+ spec.require_paths = ['lib']
20
21
 
21
- spec.add_dependency 'rubyzip', '~> 1.2.1'
22
- spec.add_dependency 'nokogiri', '~> 1.8'
23
22
  spec.add_dependency 'activesupport', '>2.2.1 '
23
+ spec.add_dependency 'nokogiri', '~> 1.10', '>= 1.10.4'
24
+ spec.add_dependency 'rubyzip', '~> 1.2.1'
24
25
 
25
- spec.add_development_dependency "bundler", "~> 1.10"
26
- spec.add_development_dependency "rake", "~> 10.0"
27
- spec.add_development_dependency "rspec"
26
+ spec.add_development_dependency 'bundler', '~> 2.0'
27
+ spec.add_development_dependency 'rake', '~> 12.0'
28
+ spec.add_development_dependency 'rspec'
28
29
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: plaintext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Krämer
@@ -10,78 +10,84 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2019-01-16 00:00:00.000000000 Z
13
+ date: 2019-09-03 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: rubyzip
16
+ name: activesupport
17
17
  requirement: !ruby/object:Gem::Requirement
18
18
  requirements:
19
- - - "~>"
19
+ - - ">"
20
20
  - !ruby/object:Gem::Version
21
- version: 1.2.1
21
+ version: 2.2.1
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
25
25
  requirements:
26
- - - "~>"
26
+ - - ">"
27
27
  - !ruby/object:Gem::Version
28
- version: 1.2.1
28
+ version: 2.2.1
29
29
  - !ruby/object:Gem::Dependency
30
30
  name: nokogiri
31
31
  requirement: !ruby/object:Gem::Requirement
32
32
  requirements:
33
33
  - - "~>"
34
34
  - !ruby/object:Gem::Version
35
- version: '1.8'
35
+ version: '1.10'
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: 1.10.4
36
39
  type: :runtime
37
40
  prerelease: false
38
41
  version_requirements: !ruby/object:Gem::Requirement
39
42
  requirements:
40
43
  - - "~>"
41
44
  - !ruby/object:Gem::Version
42
- version: '1.8'
45
+ version: '1.10'
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: 1.10.4
43
49
  - !ruby/object:Gem::Dependency
44
- name: activesupport
50
+ name: rubyzip
45
51
  requirement: !ruby/object:Gem::Requirement
46
52
  requirements:
47
- - - ">"
53
+ - - "~>"
48
54
  - !ruby/object:Gem::Version
49
- version: 2.2.1
55
+ version: 1.2.1
50
56
  type: :runtime
51
57
  prerelease: false
52
58
  version_requirements: !ruby/object:Gem::Requirement
53
59
  requirements:
54
- - - ">"
60
+ - - "~>"
55
61
  - !ruby/object:Gem::Version
56
- version: 2.2.1
62
+ version: 1.2.1
57
63
  - !ruby/object:Gem::Dependency
58
64
  name: bundler
59
65
  requirement: !ruby/object:Gem::Requirement
60
66
  requirements:
61
67
  - - "~>"
62
68
  - !ruby/object:Gem::Version
63
- version: '1.10'
69
+ version: '2.0'
64
70
  type: :development
65
71
  prerelease: false
66
72
  version_requirements: !ruby/object:Gem::Requirement
67
73
  requirements:
68
74
  - - "~>"
69
75
  - !ruby/object:Gem::Version
70
- version: '1.10'
76
+ version: '2.0'
71
77
  - !ruby/object:Gem::Dependency
72
78
  name: rake
73
79
  requirement: !ruby/object:Gem::Requirement
74
80
  requirements:
75
81
  - - "~>"
76
82
  - !ruby/object:Gem::Version
77
- version: '10.0'
83
+ version: '12.0'
78
84
  type: :development
79
85
  prerelease: false
80
86
  version_requirements: !ruby/object:Gem::Requirement
81
87
  requirements:
82
88
  - - "~>"
83
89
  - !ruby/object:Gem::Version
84
- version: '10.0'
90
+ version: '12.0'
85
91
  - !ruby/object:Gem::Dependency
86
92
  name: rspec
87
93
  requirement: !ruby/object:Gem::Requirement
@@ -137,7 +143,8 @@ files:
137
143
  - plaintext.gemspec
138
144
  - plaintext.yml.example
139
145
  homepage: https://github.com/planio-gmbh/plaintext
140
- licenses: []
146
+ licenses:
147
+ - GPL-2.0
141
148
  metadata: {}
142
149
  post_install_message:
143
150
  rdoc_options: []
@@ -154,8 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
154
161
  - !ruby/object:Gem::Version
155
162
  version: '0'
156
163
  requirements: []
157
- rubyforge_project:
158
- rubygems_version: 2.4.5.5
164
+ rubygems_version: 3.0.1
159
165
  signing_key:
160
166
  specification_version: 4
161
167
  summary: Extract plain text from most common office documents.