chupa-text-decomposer-pdf 1.0.8 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/chupa-text-decomposer-pdf.gemspec +2 -2
- data/doc/text/news.md +8 -0
- data/lib/chupa-text/decomposers/pdf.rb +19 -3
- data/test/fixture/empty.pdf +1 -0
- data/test/test-pdf.rb +25 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3d1885ca60dea6d4d7ffb8b4c3aa1f35dda7a1619577bad2b2526bd7d7db6180
|
4
|
+
data.tar.gz: 7d2ccc5f1dd135a5ddf281cf9d2c31215dbcad4943c12f0f03cea32e277a7a2f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6ab4e283c338c8f6a9157912781efa345c35c2eb39b5ee08cbdd293e9570deddb4654c138dc704af1093e1714657b7392bf63f0685eadd467ca86d480988af31
|
7
|
+
data.tar.gz: 2745efd44d1e686b53f49f6569a4d9ebcccd4937fb01ea544d1bbc580d8cefa4f958f9cde3e6bd21453a5e29889340f2662655bd09e9fdb36cc1cb6d55651703
|
@@ -22,7 +22,7 @@ end
|
|
22
22
|
|
23
23
|
Gem::Specification.new do |spec|
|
24
24
|
spec.name = "chupa-text-decomposer-pdf"
|
25
|
-
spec.version = "1.0
|
25
|
+
spec.version = "1.1.0"
|
26
26
|
spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-pdf"
|
27
27
|
spec.authors = ["Kouhei Sutou"]
|
28
28
|
spec.email = ["kou@clear-code.com"]
|
@@ -39,7 +39,7 @@ Gem::Specification.new do |spec|
|
|
39
39
|
spec.files += Dir.glob("doc/text/*")
|
40
40
|
spec.files += Dir.glob("test/**/*")
|
41
41
|
|
42
|
-
spec.add_runtime_dependency("chupa-text", ">= 1.1.
|
42
|
+
spec.add_runtime_dependency("chupa-text", ">= 1.1.9")
|
43
43
|
spec.add_runtime_dependency("poppler")
|
44
44
|
|
45
45
|
spec.add_development_dependency("bundler")
|
data/doc/text/news.md
CHANGED
@@ -22,6 +22,8 @@ require "poppler"
|
|
22
22
|
module ChupaText
|
23
23
|
module Decomposers
|
24
24
|
class PDF < Decomposer
|
25
|
+
include Loggable
|
26
|
+
|
25
27
|
registry.register("pdf", self)
|
26
28
|
|
27
29
|
def target?(data)
|
@@ -37,6 +39,8 @@ module ChupaText
|
|
37
39
|
|
38
40
|
def decompose(data)
|
39
41
|
document = create_document(data)
|
42
|
+
return if document.nil?
|
43
|
+
|
40
44
|
text = ""
|
41
45
|
document.each do |page|
|
42
46
|
page_text = page.get_text
|
@@ -75,7 +79,9 @@ module ChupaText
|
|
75
79
|
if path.nil?
|
76
80
|
file = Tempfile.new(["chupa-text-decomposer-pdf", ".pdf"])
|
77
81
|
file.binmode
|
78
|
-
|
82
|
+
data.open do |input|
|
83
|
+
IO.copy_stream(input, file)
|
84
|
+
end
|
79
85
|
file.close
|
80
86
|
path = file.path
|
81
87
|
end
|
@@ -85,8 +91,14 @@ module ChupaText
|
|
85
91
|
end
|
86
92
|
rescue Poppler::Error::Encrypted
|
87
93
|
raise ChupaText::EncryptedError.new(data)
|
88
|
-
rescue
|
89
|
-
|
94
|
+
rescue Poppler::Error => poppler_error
|
95
|
+
error do
|
96
|
+
message = "#{log_tag} Failed to process PDF: "
|
97
|
+
message << "#{poppler_error.class}: #{poppler_error.message}\n"
|
98
|
+
message << poppler_error.backtrace.join("\n")
|
99
|
+
message
|
100
|
+
end
|
101
|
+
nil
|
90
102
|
end
|
91
103
|
end
|
92
104
|
|
@@ -146,6 +158,10 @@ module ChupaText
|
|
146
158
|
|
147
159
|
Screenshot.new("image/png", [png.string].pack("m*"), "base64")
|
148
160
|
end
|
161
|
+
|
162
|
+
def log_tag
|
163
|
+
"[decomposer][pdf]"
|
164
|
+
end
|
149
165
|
end
|
150
166
|
end
|
151
167
|
end
|
@@ -0,0 +1 @@
|
|
1
|
+
%PDF-1
|
data/test/test-pdf.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Copyright (C) 2013-
|
1
|
+
# Copyright (C) 2013-2019 Kouhei Sutou <kou@clear-code.com>
|
2
2
|
#
|
3
3
|
# This library is free software; you can redistribute it and/or
|
4
4
|
# modify it under the terms of the GNU Lesser General Public
|
@@ -32,6 +32,13 @@ class TestPDF < Test::Unit::TestCase
|
|
32
32
|
base_path.join(*components)
|
33
33
|
end
|
34
34
|
|
35
|
+
def capture_log(&block)
|
36
|
+
ChupaText::CaptureLogger.capture(&block).collect do |level, message|
|
37
|
+
message = message.split("\n", 2)[0]
|
38
|
+
[level, message]
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
35
42
|
sub_test_case("target?") do
|
36
43
|
sub_test_case("extension") do
|
37
44
|
def create_data(uri)
|
@@ -212,5 +219,22 @@ class TestPDF < Test::Unit::TestCase
|
|
212
219
|
end
|
213
220
|
end
|
214
221
|
end
|
222
|
+
|
223
|
+
sub_test_case("invalid") do
|
224
|
+
def test_empty
|
225
|
+
messages = capture_log do
|
226
|
+
assert_equal([],
|
227
|
+
decompose(fixture_path("empty.pdf")).collect(&:body))
|
228
|
+
end
|
229
|
+
assert_equal([
|
230
|
+
[
|
231
|
+
:error,
|
232
|
+
"[decomposer][pdf] Failed to process PDF: " +
|
233
|
+
"Poppler::Error::Damaged: PDF document is damaged",
|
234
|
+
],
|
235
|
+
],
|
236
|
+
messages)
|
237
|
+
end
|
238
|
+
end
|
215
239
|
end
|
216
240
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: chupa-text-decomposer-pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kouhei Sutou
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 1.1.
|
19
|
+
version: 1.1.9
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: 1.1.
|
26
|
+
version: 1.1.9
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: poppler
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -143,6 +143,7 @@ files:
|
|
143
143
|
- lib/chupa-text/decomposers/pdf.rb
|
144
144
|
- test/fixture/attributes.odt
|
145
145
|
- test/fixture/attributes.pdf
|
146
|
+
- test/fixture/empty.pdf
|
146
147
|
- test/fixture/encrypted.odt
|
147
148
|
- test/fixture/encrypted.pdf
|
148
149
|
- test/fixture/multi-pages.odt
|