chupa-text-decomposer-pdf 1.0.8 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 206b91fbb36ea3f1df98fbc331c6216d5bcd4463c08f3fd17eae9084fe111fb0
4
- data.tar.gz: 321fb688ddfd596037bf93b890869a8c7b1795f38a84c938fd3971889d7e443a
3
+ metadata.gz: 3d1885ca60dea6d4d7ffb8b4c3aa1f35dda7a1619577bad2b2526bd7d7db6180
4
+ data.tar.gz: 7d2ccc5f1dd135a5ddf281cf9d2c31215dbcad4943c12f0f03cea32e277a7a2f
5
5
  SHA512:
6
- metadata.gz: 22b2d8cf3b943b1aa42ee4cab84968979fc3b7e902f2d2b62f803bb537a127e2a37d8c218b6002f17d87039dc366b714b5a0b5d2aa6d0ebfd90fcd98bc7954df
7
- data.tar.gz: d5fb58450ac452e7e88e5c3931181c268051df432ddefaa4073eb1cc5baa3af21e1f1615cca2ee94cf3216008d2706856468081c623d6010f7b7e55ff5d473ff
6
+ metadata.gz: 6ab4e283c338c8f6a9157912781efa345c35c2eb39b5ee08cbdd293e9570deddb4654c138dc704af1093e1714657b7392bf63f0685eadd467ca86d480988af31
7
+ data.tar.gz: 2745efd44d1e686b53f49f6569a4d9ebcccd4937fb01ea544d1bbc580d8cefa4f958f9cde3e6bd21453a5e29889340f2662655bd09e9fdb36cc1cb6d55651703
@@ -22,7 +22,7 @@ end
22
22
 
23
23
  Gem::Specification.new do |spec|
24
24
  spec.name = "chupa-text-decomposer-pdf"
25
- spec.version = "1.0.8"
25
+ spec.version = "1.1.0"
26
26
  spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-pdf"
27
27
  spec.authors = ["Kouhei Sutou"]
28
28
  spec.email = ["kou@clear-code.com"]
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
39
39
  spec.files += Dir.glob("doc/text/*")
40
40
  spec.files += Dir.glob("test/**/*")
41
41
 
42
- spec.add_runtime_dependency("chupa-text", ">= 1.1.8")
42
+ spec.add_runtime_dependency("chupa-text", ">= 1.1.9")
43
43
  spec.add_runtime_dependency("poppler")
44
44
 
45
45
  spec.add_development_dependency("bundler")
@@ -1,5 +1,13 @@
1
1
  # News
2
2
 
3
+ ## 1.0.9: 2019-03-03
4
+
5
+ ### Improvements
6
+
7
+ * Changed to use log instead of exception for invalid PDF.
8
+
9
+ * Reduced memory usage.
10
+
3
11
  ## 1.0.8: 2019-03-03
4
12
 
5
13
  ### Improvements
@@ -22,6 +22,8 @@ require "poppler"
22
22
  module ChupaText
23
23
  module Decomposers
24
24
  class PDF < Decomposer
25
+ include Loggable
26
+
25
27
  registry.register("pdf", self)
26
28
 
27
29
  def target?(data)
@@ -37,6 +39,8 @@ module ChupaText
37
39
 
38
40
  def decompose(data)
39
41
  document = create_document(data)
42
+ return if document.nil?
43
+
40
44
  text = ""
41
45
  document.each do |page|
42
46
  page_text = page.get_text
@@ -75,7 +79,9 @@ module ChupaText
75
79
  if path.nil?
76
80
  file = Tempfile.new(["chupa-text-decomposer-pdf", ".pdf"])
77
81
  file.binmode
78
- file.write(data.body)
82
+ data.open do |input|
83
+ IO.copy_stream(input, file)
84
+ end
79
85
  file.close
80
86
  path = file.path
81
87
  end
@@ -85,8 +91,14 @@ module ChupaText
85
91
  end
86
92
  rescue Poppler::Error::Encrypted
87
93
  raise ChupaText::EncryptedError.new(data)
88
- rescue GLib::Error => error
89
- raise ChupaText::InvalidDataError.new(data, error.message)
94
+ rescue Poppler::Error => poppler_error
95
+ error do
96
+ message = "#{log_tag} Failed to process PDF: "
97
+ message << "#{poppler_error.class}: #{poppler_error.message}\n"
98
+ message << poppler_error.backtrace.join("\n")
99
+ message
100
+ end
101
+ nil
90
102
  end
91
103
  end
92
104
 
@@ -146,6 +158,10 @@ module ChupaText
146
158
 
147
159
  Screenshot.new("image/png", [png.string].pack("m*"), "base64")
148
160
  end
161
+
162
+ def log_tag
163
+ "[decomposer][pdf]"
164
+ end
149
165
  end
150
166
  end
151
167
  end
@@ -0,0 +1 @@
1
+ %PDF-1
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013-2017 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2019 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -32,6 +32,13 @@ class TestPDF < Test::Unit::TestCase
32
32
  base_path.join(*components)
33
33
  end
34
34
 
35
+ def capture_log(&block)
36
+ ChupaText::CaptureLogger.capture(&block).collect do |level, message|
37
+ message = message.split("\n", 2)[0]
38
+ [level, message]
39
+ end
40
+ end
41
+
35
42
  sub_test_case("target?") do
36
43
  sub_test_case("extension") do
37
44
  def create_data(uri)
@@ -212,5 +219,22 @@ class TestPDF < Test::Unit::TestCase
212
219
  end
213
220
  end
214
221
  end
222
+
223
+ sub_test_case("invalid") do
224
+ def test_empty
225
+ messages = capture_log do
226
+ assert_equal([],
227
+ decompose(fixture_path("empty.pdf")).collect(&:body))
228
+ end
229
+ assert_equal([
230
+ [
231
+ :error,
232
+ "[decomposer][pdf] Failed to process PDF: " +
233
+ "Poppler::Error::Damaged: PDF document is damaged",
234
+ ],
235
+ ],
236
+ messages)
237
+ end
238
+ end
215
239
  end
216
240
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text-decomposer-pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.8
4
+ version: 1.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 1.1.8
19
+ version: 1.1.9
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: 1.1.8
26
+ version: 1.1.9
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: poppler
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -143,6 +143,7 @@ files:
143
143
  - lib/chupa-text/decomposers/pdf.rb
144
144
  - test/fixture/attributes.odt
145
145
  - test/fixture/attributes.pdf
146
+ - test/fixture/empty.pdf
146
147
  - test/fixture/encrypted.odt
147
148
  - test/fixture/encrypted.pdf
148
149
  - test/fixture/multi-pages.odt