slaw 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d5870ba16d21c7e3577e3b03f4d361a0afc4f60f
4
- data.tar.gz: 7ce6c9cdee52a9156a1b5a26b1a75a583bae80ab
3
+ metadata.gz: 30ad1ae5e3d2aadfcf89800d91f16f0c0b38620f
4
+ data.tar.gz: c93431db28804db785afda0dae9684bad5a62dbd
5
5
  SHA512:
6
- metadata.gz: 24a30dd3aa44f5fa2416548d995fb52228a53c729b0f9834afe1af93c5623f9f22925e9fd939a6611f9b5869b2ed4531db95e059196d0738891c195e861fa42c
7
- data.tar.gz: 7880f7ff8953864a056a35864a7c4d4a6a6d4573d755d1f1b8deb4b748e2c5e0d791e0926c837d97abc775aef67dc2c667018a05bb90438ac9964bb73a810683
6
+ metadata.gz: 2a0881b4ba4225943bd4b491f7385ba7b70125301c0aed149f70144991c1fec5ee3bdee49d56123848abecd94fd6c9b73f06c15ca66b00e08e50d46dada2f6b3
7
+ data.tar.gz: fa04f689a4a136067fe4b22961d6d6b5597ded5807cddb2b9fa7e620a25982ef31ed830e50df8a30e62300b3e51c79df5478020d74a391262981a41d50ae1311
data/README.md CHANGED
@@ -28,11 +28,16 @@ Or install it with:
28
28
 
29
29
  $ gem install slaw
30
30
 
31
- To run PDF extraction you will also need [xpdf](http://www.foolabs.com/xpdf/).
31
+ To run PDF extraction you will also need [xpdf](http://www.foolabs.com/xpdf/) and
32
32
  If you're on a Mac, you can use:
33
33
 
34
34
  brew install xpdf
35
35
 
36
+ You may also need Ghostscript to remove password protection from PDF files. This is
37
+ installed by default on most systems (including Mac). On Ubuntu you can use:
38
+
39
+ sudo apt-get install ghostscript
40
+
36
41
  ## Overview
37
42
 
38
43
  Slaw generates Acts in the [Akoma Ntoso](http://www.akomantoso.org) 2.0 XML
@@ -1,4 +1,5 @@
1
1
  require 'open3'
2
+ require 'tempfile'
2
3
 
3
4
  module Slaw
4
5
  module Extract
@@ -46,14 +47,23 @@ module Slaw
46
47
  #
47
48
  # @return [String] extracted text
48
49
  def extract_from_pdf(filename)
49
- cmd = pdf_to_text_cmd(filename)
50
- logger.info("Executing: #{cmd}")
51
- stdout, status = Open3.capture2(*cmd)
50
+ retried = false
52
51
 
53
- if status == 0
54
- cleanup(stdout)
55
- else
56
- nil
52
+ while true
53
+ cmd = pdf_to_text_cmd(filename)
54
+ logger.info("Executing: #{cmd}")
55
+ stdout, status = Open3.capture2(*cmd)
56
+
57
+ case status.exitstatus
58
+ when 0
59
+ return cleanup(stdout)
60
+ when 3
61
+ return nil if retried
62
+ retried = true
63
+ self.remove_pdf_password(filename)
64
+ else
65
+ return nil
66
+ end
57
67
  end
58
68
  end
59
69
 
@@ -79,6 +89,20 @@ module Slaw
79
89
  text
80
90
  end
81
91
 
92
+ def remove_pdf_password(filename)
93
+ file = Tempfile.new('steno')
94
+ begin
95
+ logger.info("Trying to remove password from #{filename}")
96
+ cmd = "gs -q -dNOPAUSE -dBATCH -sDEVICE=pdfwrite -sOutputFile=#{file.path} -c .setpdfwrite -f #{filename}".split(" ")
97
+ logger.info("Executing: #{cmd}")
98
+ Open3.capture2(*cmd)
99
+ FileUtils.move(file.path, filename)
100
+ ensure
101
+ file.close
102
+ file.unlink
103
+ end
104
+ end
105
+
82
106
  # Get location of the pdftotext executable for all instances.
83
107
  def self.pdftotext_path
84
108
  @@pdftotext_path
data/lib/slaw/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module Slaw
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: slaw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Greg Kempe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-20 00:00:00.000000000 Z
11
+ date: 2014-12-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler