chupa-text-decomposer-pdf 1.0.1 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e31d3498186c960a9dae07539e83f77ffa5b995
4
- data.tar.gz: 6ecc6033cd1420eb5c7dba3540cf9b4bdca569e9
3
+ metadata.gz: 71901aa054dc878f955620473635ee11df436a2b
4
+ data.tar.gz: 68bbc514ce94a38865e48a27e1849c1baec7df9e
5
5
  SHA512:
6
- metadata.gz: 661698768712b75f81cccef8df76cdd86ea76ef95bb38320d0e713c2a2a0a68cc222c62c64219baad59b72788bcb5ff5e0fee0277ef2f74125c3a6e24d1a6ca6
7
- data.tar.gz: 54d47d4bbd031c02f8774200177b29c5c112a574f812bc68ee43d20019a00e461e8ca4aad955f6c50913e28578656ffdaa12fdca33e04d603df32f9c79662c62
6
+ metadata.gz: 66d265273482895c235c053c5cd69ae92a9510288abcc2a72a8a4f686d9942c15fe6ae4d442d13cf95bb21f5696b81aac68e354c7a023b25da1b292aaece0ade
7
+ data.tar.gz: 649fa65ae2b74fc051896afc161eb91c8db9ad04626af94d287bf32bf8961a0ce7feb521f635ccf043297172d550ce2bb1c59ea826e211c43eb6a8fa84d1a943
@@ -22,7 +22,7 @@ end
22
22
 
23
23
  Gem::Specification.new do |spec|
24
24
  spec.name = "chupa-text-decomposer-pdf"
25
- spec.version = "1.0.1"
25
+ spec.version = "1.0.2"
26
26
  spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-pdf"
27
27
  spec.authors = ["Kouhei Sutou"]
28
28
  spec.email = ["kou@clear-code.com"]
data/doc/text/news.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # News
2
2
 
3
+ ## 1.0.2: 2014-02-18
4
+
5
+ ### Improvements
6
+
7
+ * Supported encrypted PDF. Use `:password` option.
8
+
3
9
  ## 1.0.1: 2014-02-16
4
10
 
5
11
  ### Improvements
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2014 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -29,13 +29,13 @@ module ChupaText
29
29
  end
30
30
 
31
31
  def decompose(data)
32
- document = Poppler::Document.new(data.body)
32
+ document = create_document(data)
33
33
  text = ""
34
34
  document.each do |page|
35
35
  page_text = page.get_text
36
36
  next if page_text.empty?
37
- text << "\n" unless text.empty?
38
37
  text << page_text
38
+ text << "\n" unless page_text.end_with?("\n")
39
39
  end
40
40
  text_data = TextData.new(text)
41
41
  text_data.uri = data.uri
@@ -45,17 +45,52 @@ module ChupaText
45
45
  add_attribute(text_data, document, :keywords)
46
46
  add_attribute(text_data, document, :creator)
47
47
  add_attribute(text_data, document, :producer)
48
- add_attribute(text_data, document, :creation_date)
48
+ add_attribute(text_data, document, :creation_date, :created_time)
49
49
  yield(text_data)
50
50
  end
51
51
 
52
52
  private
53
- def add_attribute(text_data, document, name)
54
- value = document.send(name)
53
+ def create_document(data)
54
+ _password = password(data)
55
+ begin
56
+ wrap_stderr do
57
+ Poppler::Document.new(data.body, _password)
58
+ end
59
+ rescue GLib::Error => error
60
+ case error.code
61
+ when Poppler::Error::ENCRYPTED.to_i
62
+ raise ChupaText::EncryptedError.new(data)
63
+ else
64
+ raise ChupaText::InvalidDataError.new(data, error.message)
65
+ end
66
+ end
67
+ end
68
+
69
+ def password(data)
70
+ password = @options[:password]
71
+ if password.respond_to?(:call)
72
+ password = password.call(data)
73
+ end
74
+ password
75
+ end
76
+
77
+ def wrap_stderr
78
+ stderr = $stderr.dup
79
+ input, output = IO.pipe
80
+ _ = input # TODO: Report output
81
+ $stderr.reopen(output)
82
+ yield
83
+ ensure
84
+ $stderr.reopen(stderr)
85
+ end
86
+
87
+ def add_attribute(text_data, document,
88
+ pdf_attribute_name, data_attribute_name=nil)
89
+ value = document.send(pdf_attribute_name)
55
90
  return if value.nil?
56
- attribute_name = name.to_s.gsub(/_/, "-")
57
91
  value = Time.at(value).utc.iso8601 if value.is_a?(Integer)
58
- text_data[attribute_name] = value
92
+ data_attribute_name ||= pdf_attribute_name.to_s.gsub(/_/, "-")
93
+ text_data[data_attribute_name] = value
59
94
  end
60
95
  end
61
96
  end
Binary file
Binary file
data/test/run-test.rb CHANGED
@@ -18,6 +18,8 @@
18
18
 
19
19
  $VERBOSE = true
20
20
 
21
+ ENV["TZ"] = "JST"
22
+
21
23
  require "bundler/setup"
22
24
 
23
25
  require "test-unit"
data/test/test-pdf.rb CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2014 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -18,10 +18,14 @@ require "pathname"
18
18
 
19
19
  class TestPDF < Test::Unit::TestCase
20
20
  def setup
21
- @decomposer = ChupaText::Decomposers::PDF.new({})
21
+ @options = {}
22
22
  end
23
23
 
24
24
  private
25
+ def decomposer
26
+ ChupaText::Decomposers::PDF.new(@options)
27
+ end
28
+
25
29
  def fixture_path(*components)
26
30
  base_path = Pathname(__FILE__).dirname + "fixture"
27
31
  base_path.join(*components)
@@ -37,11 +41,11 @@ class TestPDF < Test::Unit::TestCase
37
41
  end
38
42
 
39
43
  def test_pdf
40
- assert_true(@decomposer.target?(create_data("index.pdf")))
44
+ assert_true(decomposer.target?(create_data("index.pdf")))
41
45
  end
42
46
 
43
47
  def test_html
44
- assert_false(@decomposer.target?(create_data("index.html")))
48
+ assert_false(decomposer.target?(create_data("index.html")))
45
49
  end
46
50
  end
47
51
 
@@ -53,11 +57,11 @@ class TestPDF < Test::Unit::TestCase
53
57
  end
54
58
 
55
59
  def test_pdf
56
- assert_true(@decomposer.target?(create_data("application/pdf")))
60
+ assert_true(decomposer.target?(create_data("application/pdf")))
57
61
  end
58
62
 
59
63
  def test_html
60
- assert_false(@decomposer.target?(create_data("text/html")))
64
+ assert_false(decomposer.target?(create_data("text/html")))
61
65
  end
62
66
  end
63
67
  end
@@ -69,7 +73,7 @@ class TestPDF < Test::Unit::TestCase
69
73
  data.mime_type = "text/pdf"
70
74
 
71
75
  decomposed = []
72
- @decomposer.decompose(data) do |decomposed_data|
76
+ decomposer.decompose(data) do |decomposed_data|
73
77
  decomposed << decomposed_data
74
78
  end
75
79
  decomposed
@@ -100,8 +104,9 @@ class TestPDF < Test::Unit::TestCase
100
104
  assert_equal(["LibreOffice 4.1"], decompose("producer"))
101
105
  end
102
106
 
103
- def test_creation_date
104
- assert_equal([nil], decompose("creation_date"))
107
+ def test_created_time
108
+ assert_equal([Time.parse("2014-01-06T00:52:45+09:00")],
109
+ decompose("created_time"))
105
110
  end
106
111
 
107
112
  private
@@ -114,7 +119,7 @@ class TestPDF < Test::Unit::TestCase
114
119
 
115
120
  sub_test_case("one page") do
116
121
  def test_body
117
- assert_equal(["Page1"], decompose.collect(&:body))
122
+ assert_equal(["Page1\n"], decompose.collect(&:body))
118
123
  end
119
124
 
120
125
  private
@@ -125,7 +130,7 @@ class TestPDF < Test::Unit::TestCase
125
130
 
126
131
  sub_test_case("multi pages") do
127
132
  def test_body
128
- assert_equal(["Page1\nPage2"], decompose.collect(&:body))
133
+ assert_equal(["Page1\nPage2\n"], decompose.collect(&:body))
129
134
  end
130
135
 
131
136
  private
@@ -133,5 +138,30 @@ class TestPDF < Test::Unit::TestCase
133
138
  super(fixture_path("multi-pages.pdf"))
134
139
  end
135
140
  end
141
+
142
+ sub_test_case("encrypted") do
143
+ def test_with_password
144
+ @options = {:password => "encrypted"}
145
+ assert_equal(["Password is 'encrypted'.\n"],
146
+ decompose.collect(&:body))
147
+ end
148
+
149
+ def test_with_password_block
150
+ @options = {:password => lambda {|data| "encrypted"}}
151
+ assert_equal(["Password is 'encrypted'.\n"],
152
+ decompose.collect(&:body))
153
+ end
154
+
155
+ def test_without_password
156
+ assert_raise(ChupaText::EncryptedError) do
157
+ decompose
158
+ end
159
+ end
160
+
161
+ private
162
+ def decompose
163
+ super(fixture_path("encrypted.pdf"))
164
+ end
165
+ end
136
166
  end
137
167
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text-decomposer-pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-16 00:00:00.000000000 Z
11
+ date: 2014-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: chupa-text
@@ -130,7 +130,9 @@ files:
130
130
  - test/test-pdf.rb
131
131
  - test/fixture/one-page.pdf
132
132
  - test/fixture/multi-pages.pdf
133
+ - test/fixture/encrypted.pdf
133
134
  - test/fixture/attributes.pdf
135
+ - test/fixture/encrypted.odt
134
136
  - test/fixture/one-page.odt
135
137
  - test/fixture/multi-pages.odt
136
138
  - test/fixture/attributes.odt