chupa-text-decomposer-pdf 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6e31d3498186c960a9dae07539e83f77ffa5b995
4
- data.tar.gz: 6ecc6033cd1420eb5c7dba3540cf9b4bdca569e9
3
+ metadata.gz: 71901aa054dc878f955620473635ee11df436a2b
4
+ data.tar.gz: 68bbc514ce94a38865e48a27e1849c1baec7df9e
5
5
  SHA512:
6
- metadata.gz: 661698768712b75f81cccef8df76cdd86ea76ef95bb38320d0e713c2a2a0a68cc222c62c64219baad59b72788bcb5ff5e0fee0277ef2f74125c3a6e24d1a6ca6
7
- data.tar.gz: 54d47d4bbd031c02f8774200177b29c5c112a574f812bc68ee43d20019a00e461e8ca4aad955f6c50913e28578656ffdaa12fdca33e04d603df32f9c79662c62
6
+ metadata.gz: 66d265273482895c235c053c5cd69ae92a9510288abcc2a72a8a4f686d9942c15fe6ae4d442d13cf95bb21f5696b81aac68e354c7a023b25da1b292aaece0ade
7
+ data.tar.gz: 649fa65ae2b74fc051896afc161eb91c8db9ad04626af94d287bf32bf8961a0ce7feb521f635ccf043297172d550ce2bb1c59ea826e211c43eb6a8fa84d1a943
@@ -22,7 +22,7 @@ end
22
22
 
23
23
  Gem::Specification.new do |spec|
24
24
  spec.name = "chupa-text-decomposer-pdf"
25
- spec.version = "1.0.1"
25
+ spec.version = "1.0.2"
26
26
  spec.homepage = "https://github.com/ranguba/chupa-text-decomposer-pdf"
27
27
  spec.authors = ["Kouhei Sutou"]
28
28
  spec.email = ["kou@clear-code.com"]
data/doc/text/news.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # News
2
2
 
3
+ ## 1.0.2: 2014-02-18
4
+
5
+ ### Improvements
6
+
7
+ * Supported encrypted PDF. Use `:password` option.
8
+
3
9
  ## 1.0.1: 2014-02-16
4
10
 
5
11
  ### Improvements
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2014 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -29,13 +29,13 @@ module ChupaText
29
29
  end
30
30
 
31
31
  def decompose(data)
32
- document = Poppler::Document.new(data.body)
32
+ document = create_document(data)
33
33
  text = ""
34
34
  document.each do |page|
35
35
  page_text = page.get_text
36
36
  next if page_text.empty?
37
- text << "\n" unless text.empty?
38
37
  text << page_text
38
+ text << "\n" unless page_text.end_with?("\n")
39
39
  end
40
40
  text_data = TextData.new(text)
41
41
  text_data.uri = data.uri
@@ -45,17 +45,52 @@ module ChupaText
45
45
  add_attribute(text_data, document, :keywords)
46
46
  add_attribute(text_data, document, :creator)
47
47
  add_attribute(text_data, document, :producer)
48
- add_attribute(text_data, document, :creation_date)
48
+ add_attribute(text_data, document, :creation_date, :created_time)
49
49
  yield(text_data)
50
50
  end
51
51
 
52
52
  private
53
- def add_attribute(text_data, document, name)
54
- value = document.send(name)
53
+ def create_document(data)
54
+ _password = password(data)
55
+ begin
56
+ wrap_stderr do
57
+ Poppler::Document.new(data.body, _password)
58
+ end
59
+ rescue GLib::Error => error
60
+ case error.code
61
+ when Poppler::Error::ENCRYPTED.to_i
62
+ raise ChupaText::EncryptedError.new(data)
63
+ else
64
+ raise ChupaText::InvalidDataError.new(data, error.message)
65
+ end
66
+ end
67
+ end
68
+
69
+ def password(data)
70
+ password = @options[:password]
71
+ if password.respond_to?(:call)
72
+ password = password.call(data)
73
+ end
74
+ password
75
+ end
76
+
77
+ def wrap_stderr
78
+ stderr = $stderr.dup
79
+ input, output = IO.pipe
80
+ _ = input # TODO: Report output
81
+ $stderr.reopen(output)
82
+ yield
83
+ ensure
84
+ $stderr.reopen(stderr)
85
+ end
86
+
87
+ def add_attribute(text_data, document,
88
+ pdf_attribute_name, data_attribute_name=nil)
89
+ value = document.send(pdf_attribute_name)
55
90
  return if value.nil?
56
- attribute_name = name.to_s.gsub(/_/, "-")
57
91
  value = Time.at(value).utc.iso8601 if value.is_a?(Integer)
58
- text_data[attribute_name] = value
92
+ data_attribute_name ||= pdf_attribute_name.to_s.gsub(/_/, "-")
93
+ text_data[data_attribute_name] = value
59
94
  end
60
95
  end
61
96
  end
Binary file
Binary file
data/test/run-test.rb CHANGED
@@ -18,6 +18,8 @@
18
18
 
19
19
  $VERBOSE = true
20
20
 
21
+ ENV["TZ"] = "JST"
22
+
21
23
  require "bundler/setup"
22
24
 
23
25
  require "test-unit"
data/test/test-pdf.rb CHANGED
@@ -1,4 +1,4 @@
1
- # Copyright (C) 2013 Kouhei Sutou <kou@clear-code.com>
1
+ # Copyright (C) 2013-2014 Kouhei Sutou <kou@clear-code.com>
2
2
  #
3
3
  # This library is free software; you can redistribute it and/or
4
4
  # modify it under the terms of the GNU Lesser General Public
@@ -18,10 +18,14 @@ require "pathname"
18
18
 
19
19
  class TestPDF < Test::Unit::TestCase
20
20
  def setup
21
- @decomposer = ChupaText::Decomposers::PDF.new({})
21
+ @options = {}
22
22
  end
23
23
 
24
24
  private
25
+ def decomposer
26
+ ChupaText::Decomposers::PDF.new(@options)
27
+ end
28
+
25
29
  def fixture_path(*components)
26
30
  base_path = Pathname(__FILE__).dirname + "fixture"
27
31
  base_path.join(*components)
@@ -37,11 +41,11 @@ class TestPDF < Test::Unit::TestCase
37
41
  end
38
42
 
39
43
  def test_pdf
40
- assert_true(@decomposer.target?(create_data("index.pdf")))
44
+ assert_true(decomposer.target?(create_data("index.pdf")))
41
45
  end
42
46
 
43
47
  def test_html
44
- assert_false(@decomposer.target?(create_data("index.html")))
48
+ assert_false(decomposer.target?(create_data("index.html")))
45
49
  end
46
50
  end
47
51
 
@@ -53,11 +57,11 @@ class TestPDF < Test::Unit::TestCase
53
57
  end
54
58
 
55
59
  def test_pdf
56
- assert_true(@decomposer.target?(create_data("application/pdf")))
60
+ assert_true(decomposer.target?(create_data("application/pdf")))
57
61
  end
58
62
 
59
63
  def test_html
60
- assert_false(@decomposer.target?(create_data("text/html")))
64
+ assert_false(decomposer.target?(create_data("text/html")))
61
65
  end
62
66
  end
63
67
  end
@@ -69,7 +73,7 @@ class TestPDF < Test::Unit::TestCase
69
73
  data.mime_type = "text/pdf"
70
74
 
71
75
  decomposed = []
72
- @decomposer.decompose(data) do |decomposed_data|
76
+ decomposer.decompose(data) do |decomposed_data|
73
77
  decomposed << decomposed_data
74
78
  end
75
79
  decomposed
@@ -100,8 +104,9 @@ class TestPDF < Test::Unit::TestCase
100
104
  assert_equal(["LibreOffice 4.1"], decompose("producer"))
101
105
  end
102
106
 
103
- def test_creation_date
104
- assert_equal([nil], decompose("creation_date"))
107
+ def test_created_time
108
+ assert_equal([Time.parse("2014-01-06T00:52:45+09:00")],
109
+ decompose("created_time"))
105
110
  end
106
111
 
107
112
  private
@@ -114,7 +119,7 @@ class TestPDF < Test::Unit::TestCase
114
119
 
115
120
  sub_test_case("one page") do
116
121
  def test_body
117
- assert_equal(["Page1"], decompose.collect(&:body))
122
+ assert_equal(["Page1\n"], decompose.collect(&:body))
118
123
  end
119
124
 
120
125
  private
@@ -125,7 +130,7 @@ class TestPDF < Test::Unit::TestCase
125
130
 
126
131
  sub_test_case("multi pages") do
127
132
  def test_body
128
- assert_equal(["Page1\nPage2"], decompose.collect(&:body))
133
+ assert_equal(["Page1\nPage2\n"], decompose.collect(&:body))
129
134
  end
130
135
 
131
136
  private
@@ -133,5 +138,30 @@ class TestPDF < Test::Unit::TestCase
133
138
  super(fixture_path("multi-pages.pdf"))
134
139
  end
135
140
  end
141
+
142
+ sub_test_case("encrypted") do
143
+ def test_with_password
144
+ @options = {:password => "encrypted"}
145
+ assert_equal(["Password is 'encrypted'.\n"],
146
+ decompose.collect(&:body))
147
+ end
148
+
149
+ def test_with_password_block
150
+ @options = {:password => lambda {|data| "encrypted"}}
151
+ assert_equal(["Password is 'encrypted'.\n"],
152
+ decompose.collect(&:body))
153
+ end
154
+
155
+ def test_without_password
156
+ assert_raise(ChupaText::EncryptedError) do
157
+ decompose
158
+ end
159
+ end
160
+
161
+ private
162
+ def decompose
163
+ super(fixture_path("encrypted.pdf"))
164
+ end
165
+ end
136
166
  end
137
167
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: chupa-text-decomposer-pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.1
4
+ version: 1.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kouhei Sutou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-16 00:00:00.000000000 Z
11
+ date: 2014-02-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: chupa-text
@@ -130,7 +130,9 @@ files:
130
130
  - test/test-pdf.rb
131
131
  - test/fixture/one-page.pdf
132
132
  - test/fixture/multi-pages.pdf
133
+ - test/fixture/encrypted.pdf
133
134
  - test/fixture/attributes.pdf
135
+ - test/fixture/encrypted.odt
134
136
  - test/fixture/one-page.odt
135
137
  - test/fixture/multi-pages.odt
136
138
  - test/fixture/attributes.odt