chupa-text-decomposer-pdf 1.0.8 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 206b91fbb36ea3f1df98fbc331c6216d5bcd4463c08f3fd17eae9084fe111fb0
4
- data.tar.gz: 321fb688ddfd596037bf93b890869a8c7b1795f38a84c938fd3971889d7e443a
3
+ metadata.gz: 3d1885ca60dea6d4d7ffb8b4c3aa1f35dda7a1619577bad2b2526bd7d7db6180
4
+ data.tar.gz: 7d2ccc5f1dd135a5ddf281cf9d2c31215dbcad4943c12f0f03cea32e277a7a2f
5
5
  SHA512:
6
- metadata.gz: 22b2d8cf3b943b1aa42ee4cab84968979fc3b7e902f2d2b62f803bb537a127e2a37d8c218b6002f17d87039dc366b714b5a0b5d2aa6d0ebfd90fcd98bc7954df
7
- data.tar.gz: d5fb58450ac452e7e88e5c3931181c268051df432ddefaa4073eb1cc5baa3af21e1f1615cca2ee94cf3216008d2706856468081c623d6010f7b7e55ff5d473ff
6
+ metadata.gz: 6ab4e283c338c8f6a9157912781efa345c35c2eb39b5ee08cbdd293e9570deddb4654c138dc704af1093e1714657b7392bf63f0685eadd467ca86d480988af31
7
+ data.tar.gz: 2745efd44d1e686b53f49f6569a4d9ebcccd4937fb01ea544d1bbc580d8cefa4f958f9cde3e6bd21453a5e29889340f2662655bd09e9fdb36cc1cb6d55651703
@@ -22,7 +22,7 @@ end
22
22
 
23
23
  Gem::Specification.new do |spec|
24
24
  spec.name = "chupa-text-decomposer-pdf"
25
- spec.version = "1.0.8"
25
+ spec.version = "1.1.0"
26
26
  spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-pdf"
27
27
  spec.authors = ["Kouhei Sutou"]
28
28
  spec.email = ["kou@clear-code.com"]
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
39
39
  spec.files += Dir.glob("doc/text/*")
40
40
  spec.files += Dir.glob("test/**/*")
41
41
 
42
- spec.add_runtime_dependency("chupa-text", ">= 1.1.8")
42
+ spec.add_runtime_dependency("chupa-text", ">= 1.1.9")
43
43
  spec.add_runtime_dependency("poppler")
44
44
 
45
45
  spec.add_development_dependency("bundler")
@@ -1,5 +1,13 @@
1
1
  # News
2
2
 
3
+ ## 1.0.9: 2019-03-03
4
+
5
+ ### Improvements
6
+
7
+ * Changed to use log instead of exception for invalid PDF.
8
+
9
+ * Reduced memory usage.
10
+
3
11
  ## 1.0.8: 2019-03-03
4
12
 
5
13
  ### Improvements
@@ -22,6 +22,8 @@ require "poppler"
22
22
  module ChupaText
23
23
  module Decomposers
24
24
  class PDF < Decomposer
25
+ include Loggable
26
+
25
27
  registry.register("pdf", self)
26
28
 
27
29
  def target?(data)
@@ -37,6 +39,8 @@ module ChupaText
37
39
 
38
40
  def decompose(data)
39
41
  document = create_document(data)
42
+ return if document.nil?
43
+
40
44
  text = ""
41
45
  document.each do |page|
42
46
  page_text = page.get_text
@@ -75,7 +79,9 @@ module ChupaText
75
79
  if path.nil?
76
80
  file = Tempfile.new(["chupa-text-decomposer-pdf", ".pdf"])
77
81
  file.binmode
78
- file.write(data.body)
82
+ data.open do |input|
83
+ IO.copy_stream(input, file)
84
+ end
79
85
  file.close
80
86
  path = file.path
81
87
  end
@@ -85,8 +91,14 @@ module ChupaText
85
91
  end
86
92
  rescue Poppler::Error::Encrypted
87
93
  raise ChupaText::EncryptedError.new(data)
88
- rescue GLib::Error => error
89
- raise ChupaText::InvalidDataError.new(data, error.message)
94
+ rescue Poppler::Error => poppler_error
95
+ error do
96
+ message = "#{log_tag} Failed to process PDF: "
97
+ message << "#{poppler_error.class}: #{poppler_error.message}\n"
98
+ message << poppler_error.backtrace.join("\n")
99
+ message
100
+ end
101
+ nil
90
102
  end
91
103
  end
92
104
 
@@ -146,6 +158,10 @@ module ChupaText
146
158
 
147
159
  Screenshot.new("image/png", [png.string].pack("m*"), "base64")
148
160
  end
161
+
162
+ def log_tag
163
+ "[decomposer][pdf]"
164
+ end
149
165
  end
150
166
  end
151
167
  end
@@ -0,0 +1 @@
1
+ %PDF-1
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2019 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -32,6 +32,13 @@ class TestPDF < Test::Unit::TestCase
32
32
  base_path.join(*components)
33
33
  end
34
34
 
35
+ def capture_log(&block)
36
+ ChupaText::CaptureLogger.capture(&block).collect do |level, message|
37
+ message = message.split("\n", 2)[0]
38
+ [level, message]
39
+ end
40
+ end
41
+
35
42
  sub_test_case("target?") do
36
43
  sub_test_case("extension") do
37
44
  def create_data(uri)
@@ -212,5 +219,22 @@ class TestPDF < Test::Unit::TestCase
212
219
  end
213
220
  end
214
221
  end
222
+
223
+ sub_test_case("invalid") do
224
+ def test_empty
225
+ messages = capture_log do
226
+ assert_equal([],
227
+ decompose(fixture_path("empty.pdf")).collect(&:body))
228
+ end
229
+ assert_equal([
230
+ [
231
+ :error,
232
+ "[decomposer][pdf] Failed to process PDF: " +
233
+ "Poppler::Error::Damaged: PDF document is damaged",
234
+ ],
235
+ ],
236
+ messages)
237
+ end
238
+ end
215
239
  end
216
240
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text-decomposer-pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.8
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 1.1.8
19
+ version: 1.1.9
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 1.1.8
26
+ version: 1.1.9
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: poppler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -143,6 +143,7 @@ files:
143
143
  - lib/chupa-text/decomposers/pdf.rb
144
144
  - test/fixture/attributes.odt
145
145
  - test/fixture/attributes.pdf
146
+ - test/fixture/empty.pdf
146
147
  - test/fixture/encrypted.odt
147
148
  - test/fixture/encrypted.pdf
148
149
  - test/fixture/multi-pages.odt