plaintext 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 857600eeedad2b6b305655743d6dd4768faa5e5d
4
- data.tar.gz: 17c78e4c90da3f07f5637c92a0891feb54b25b5b
2
+ SHA256:
3
+ metadata.gz: '048b008deb0b8bb0ff130916081610a36a130db002171c96c1243f79055127c0'
4
+ data.tar.gz: 7972e2a4aab310d79211fb1ffe302973c37601aefcdcc69825b713f1e12cccc4
5
5
  SHA512:
6
- metadata.gz: 436883566c3f9e1598a3f482c9146195646aa4e5d123aaedfe15876bb2876e93dd4c5bc964c515ece9b5f4e93b25974745bc176bd1aafb51aa4c64084221c7d5
7
- data.tar.gz: 92969693830e20b3a1fb842bb9e60a5ed63a93f87903da8c7c88466df7a62e14dec0bb03f457ba7b28e3ce47bf82da3c9cc1a22fae4555a6cba6dce9ebc48405
6
+ metadata.gz: 815b42f1d693c14e724e4d71d9bae7aef677d3afbd2a3c6a1eee02405299735ec70ac555537ac8d6e862889237f6f98e06ab92159ebfaf0a8027eada00a18b57
7
+ data.tar.gz: 1a2e6b0dd527678b0612b608834a9c682cf14787b81bd097c0fb86cd47e8786e983ca08efdfad9184066f9ef3d91d3c908d1715f591b228deab87b7bcbab89a9
@@ -1,7 +1,7 @@
1
1
  language: ruby
2
2
  rvm:
3
- - 2.2.3
3
+ - 2.6.4
4
4
  before_install:
5
5
  - sudo apt-get -qq update
6
6
  - sudo apt-get install -y catdoc unrtf poppler-utils tesseract-ocr
7
- - gem install bundler -v 1.10.6
7
+ - gem install bundler -v 2.0.1
data/CHANGELOG CHANGED
@@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.3.2] - 2019-09-02
10
+ - Set minimum Nokogiri version to 1.10.4. See CVE-2019-5477.
11
+ - Fix encoding issues for PDFs.
12
+ - Bump development dependencies to bundler version 2 and
13
+ rake version 12
14
+ - Update travis file to use ruby 2.6.4 and bundler 2.0.1
15
+
9
16
  ## [0.3.1] - 2019-01-16
10
17
 
11
18
  ### Added
@@ -11,21 +11,23 @@ module Plaintext
11
11
  # Due to how popen works the command will be executed directly without
12
12
  # involving the shell if cmd is an array.
13
13
  require 'fileutils'
14
+
15
+ FILE_PLACEHOLDER = '__FILE__'.freeze
16
+ DEFAULT_STREAM_ENCODING = 'ASCII-8BIT'.freeze
17
+
14
18
  def shellout(cmd, options = {}, &block)
15
19
  mode = "r+"
16
20
  IO.popen(cmd, mode) do |io|
17
- io.set_encoding("ASCII-8BIT") if io.respond_to?(:set_encoding)
21
+ set_stream_encoding(io)
18
22
  io.close_write unless options[:write_stdin]
19
23
  block.call(io) if block_given?
20
24
  end
21
25
  end
22
26
 
23
- FILE_PLACEHOLDER = '__FILE__'.freeze
24
-
25
27
  def text(file, options = {})
26
28
  cmd = @command.dup
27
29
  cmd[cmd.index(FILE_PLACEHOLDER)] = Pathname(file).to_s
28
- shellout(cmd){ |io| read io, options[:max_size] }.to_s
30
+ shellout(cmd) { |io| read io, options[:max_size] }.to_s
29
31
  end
30
32
 
31
33
 
@@ -41,10 +43,32 @@ module Plaintext
41
43
  new.available?
42
44
  end
43
45
 
46
+ protected
47
+
48
+ def utf8_stream?
49
+ false
50
+ end
51
+
44
52
  private
45
53
 
54
+ def set_stream_encoding(io)
55
+ return unless io.respond_to?(:set_encoding)
56
+
57
+ if utf8_stream?
58
+ io.set_encoding('UTF-8'.freeze)
59
+ else
60
+ io.set_encoding(DEFAULT_STREAM_ENCODING)
61
+ end
62
+ end
63
+
46
64
  def read(io, max_size = nil)
47
- Plaintext::CodesetUtil.to_utf8 io.read(max_size), "ASCII-8BIT"
65
+ piece = io.read(max_size)
66
+
67
+ if utf8_stream?
68
+ piece
69
+ else
70
+ Plaintext::CodesetUtil.to_utf8 piece, DEFAULT_STREAM_ENCODING
71
+ end
48
72
  end
49
73
  end
50
74
  end
@@ -5,9 +5,16 @@ module Plaintext
5
5
  DEFAULT = [
6
6
  '/usr/bin/pdftotext', '-enc', 'UTF-8', '__FILE__', '-'
7
7
  ].freeze
8
+
8
9
  def initialize
9
10
  @content_type = 'application/pdf'
10
11
  @command = Plaintext::Configuration['pdftotext'] || DEFAULT
11
12
  end
13
+
14
+ protected
15
+
16
+ def utf8_stream?
17
+ true
18
+ end
12
19
  end
13
20
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Plaintext
4
- VERSION = "0.3.1"
4
+ VERSION = "0.3.2"
5
5
  end
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'plaintext/version'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
- spec.name = "plaintext"
7
+ spec.name = 'plaintext'
8
8
  spec.version = Plaintext::VERSION
9
9
  spec.authors = ['Jens Krämer', 'Planio GmbH', 'OpenProject GmbH']
10
10
  spec.email = ['info@openproject.com']
@@ -12,17 +12,18 @@ Gem::Specification.new do |spec|
12
12
  spec.summary = 'Extract plain text from most common office documents.'
13
13
  spec.description = "Extract text from common office files. Based on the file's content type a command line tool is selected to do the job."
14
14
  spec.homepage = 'https://github.com/planio-gmbh/plaintext'
15
-
15
+ spec.license = 'GPL-2.0'
16
+
16
17
  spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
17
- spec.bindir = "exe"
18
+ spec.bindir = 'exe'
18
19
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
19
- spec.require_paths = ["lib"]
20
+ spec.require_paths = ['lib']
20
21
 
21
- spec.add_dependency 'rubyzip', '~> 1.2.1'
22
- spec.add_dependency 'nokogiri', '~> 1.8'
23
22
  spec.add_dependency 'activesupport', '>2.2.1 '
23
+ spec.add_dependency 'nokogiri', '~> 1.10', '>= 1.10.4'
24
+ spec.add_dependency 'rubyzip', '~> 1.2.1'
24
25
 
25
- spec.add_development_dependency "bundler", "~> 1.10"
26
- spec.add_development_dependency "rake", "~> 10.0"
27
- spec.add_development_dependency "rspec"
26
+ spec.add_development_dependency 'bundler', '~> 2.0'
27
+ spec.add_development_dependency 'rake', '~> 12.0'
28
+ spec.add_development_dependency 'rspec'
28
29
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: plaintext
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Krämer
@@ -10,78 +10,84 @@ authors:
10
10
  autorequire:
11
11
  bindir: exe
12
12
  cert_chain: []
13
- date: 2019-01-16 00:00:00.000000000 Z
13
+ date: 2019-09-03 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: rubyzip
16
+ name: activesupport
17
17
  requirement: !ruby/object:Gem::Requirement
18
18
  requirements:
19
- - - "~>"
19
+ - - ">"
20
20
  - !ruby/object:Gem::Version
21
- version: 1.2.1
21
+ version: 2.2.1
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
25
25
  requirements:
26
- - - "~>"
26
+ - - ">"
27
27
  - !ruby/object:Gem::Version
28
- version: 1.2.1
28
+ version: 2.2.1
29
29
  - !ruby/object:Gem::Dependency
30
30
  name: nokogiri
31
31
  requirement: !ruby/object:Gem::Requirement
32
32
  requirements:
33
33
  - - "~>"
34
34
  - !ruby/object:Gem::Version
35
- version: '1.8'
35
+ version: '1.10'
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: 1.10.4
36
39
  type: :runtime
37
40
  prerelease: false
38
41
  version_requirements: !ruby/object:Gem::Requirement
39
42
  requirements:
40
43
  - - "~>"
41
44
  - !ruby/object:Gem::Version
42
- version: '1.8'
45
+ version: '1.10'
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: 1.10.4
43
49
  - !ruby/object:Gem::Dependency
44
- name: activesupport
50
+ name: rubyzip
45
51
  requirement: !ruby/object:Gem::Requirement
46
52
  requirements:
47
- - - ">"
53
+ - - "~>"
48
54
  - !ruby/object:Gem::Version
49
- version: 2.2.1
55
+ version: 1.2.1
50
56
  type: :runtime
51
57
  prerelease: false
52
58
  version_requirements: !ruby/object:Gem::Requirement
53
59
  requirements:
54
- - - ">"
60
+ - - "~>"
55
61
  - !ruby/object:Gem::Version
56
- version: 2.2.1
62
+ version: 1.2.1
57
63
  - !ruby/object:Gem::Dependency
58
64
  name: bundler
59
65
  requirement: !ruby/object:Gem::Requirement
60
66
  requirements:
61
67
  - - "~>"
62
68
  - !ruby/object:Gem::Version
63
- version: '1.10'
69
+ version: '2.0'
64
70
  type: :development
65
71
  prerelease: false
66
72
  version_requirements: !ruby/object:Gem::Requirement
67
73
  requirements:
68
74
  - - "~>"
69
75
  - !ruby/object:Gem::Version
70
- version: '1.10'
76
+ version: '2.0'
71
77
  - !ruby/object:Gem::Dependency
72
78
  name: rake
73
79
  requirement: !ruby/object:Gem::Requirement
74
80
  requirements:
75
81
  - - "~>"
76
82
  - !ruby/object:Gem::Version
77
- version: '10.0'
83
+ version: '12.0'
78
84
  type: :development
79
85
  prerelease: false
80
86
  version_requirements: !ruby/object:Gem::Requirement
81
87
  requirements:
82
88
  - - "~>"
83
89
  - !ruby/object:Gem::Version
84
- version: '10.0'
90
+ version: '12.0'
85
91
  - !ruby/object:Gem::Dependency
86
92
  name: rspec
87
93
  requirement: !ruby/object:Gem::Requirement
@@ -137,7 +143,8 @@ files:
137
143
  - plaintext.gemspec
138
144
  - plaintext.yml.example
139
145
  homepage: https://github.com/planio-gmbh/plaintext
140
- licenses: []
146
+ licenses:
147
+ - GPL-2.0
141
148
  metadata: {}
142
149
  post_install_message:
143
150
  rdoc_options: []
@@ -154,8 +161,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
154
161
  - !ruby/object:Gem::Version
155
162
  version: '0'
156
163
  requirements: []
157
- rubyforge_project:
158
- rubygems_version: 2.4.5.5
164
+ rubygems_version: 3.0.1
159
165
  signing_key:
160
166
  specification_version: 4
161
167
  summary: Extract plain text from most common office documents.