pdftohtmlr 0.3.1 → 0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +1 -1
- data/lib/pdftohtmlr.rb +10 -15
- data/test/pdftohtmlr_test.rb +4 -4
- metadata +2 -2
data/README.textile
CHANGED
@@ -15,7 +15,7 @@ h1. install
|
|
15
15
|
<pre><code>gem install pdftohtmlr</code></pre>
|
16
16
|
|
17
17
|
h1. using
|
18
|
-
"gist examples":http://gist.github.com/254556
|
18
|
+
"gist examples":http://gist.github.com/254556
|
19
19
|
|
20
20
|
<pre><code lang="ruby">require 'pdftohtmlr'
|
21
21
|
require 'nokogiri'
|
data/lib/pdftohtmlr.rb
CHANGED
@@ -10,7 +10,6 @@
|
|
10
10
|
# License:: MIT
|
11
11
|
|
12
12
|
require 'rubygems'
|
13
|
-
require 'open3'
|
14
13
|
require 'nokogiri'
|
15
14
|
require 'uri'
|
16
15
|
require 'open-uri'
|
@@ -21,7 +20,7 @@ module PDFToHTMLR
|
|
21
20
|
# Simple local error abstraction
|
22
21
|
class PDFToHTMLRError < RuntimeError; end
|
23
22
|
|
24
|
-
VERSION = '0.
|
23
|
+
VERSION = '0.4'
|
25
24
|
|
26
25
|
# Provides facilities for converting PDFs to HTML from Ruby code.
|
27
26
|
class PdfFile
|
@@ -42,22 +41,19 @@ module PDFToHTMLR
|
|
42
41
|
errors = ""
|
43
42
|
output = ""
|
44
43
|
if @user_pwd
|
45
|
-
cmd = "pdftohtml -stdout -upw #{@user_pwd}
|
44
|
+
cmd = "pdftohtml -stdout -upw #{@user_pwd}" + ' "' + @path + '"'
|
46
45
|
elsif @owner_pwd
|
47
|
-
cmd = "pdftohtml -stdout -opw #{@owner_pwd}
|
46
|
+
cmd = "pdftohtml -stdout -opw #{@owner_pwd}" + ' "' + @path + '"'
|
48
47
|
else
|
49
|
-
cmd = "pdftohtml -stdout
|
48
|
+
cmd = "pdftohtml -stdout" + ' "' + @path + '"'
|
50
49
|
end
|
51
50
|
|
52
|
-
|
53
|
-
stdin.write cmd
|
54
|
-
stdin.close
|
55
|
-
output = stdout.read
|
56
|
-
errors = stderr.read
|
57
|
-
end
|
51
|
+
output = `#{cmd} 2>&1`
|
58
52
|
|
59
|
-
if (
|
60
|
-
raise PDFToHTMLRError,
|
53
|
+
if (output.include?("Error: May not be a PDF file"))
|
54
|
+
raise PDFToHTMLRError, "Error: May not be a PDF file (continuing anyway)"
|
55
|
+
elsif (output.include?("Error:"))
|
56
|
+
raise PDFToHTMLRError, output.split("\n").first.to_s.chomp
|
61
57
|
else
|
62
58
|
return output
|
63
59
|
end
|
@@ -67,7 +63,6 @@ module PDFToHTMLR
|
|
67
63
|
def convert_to_document()
|
68
64
|
Nokogiri::HTML.parse(convert())
|
69
65
|
end
|
70
|
-
|
71
66
|
end
|
72
67
|
|
73
68
|
# Handle a string-based local path as input, extends PdfFile
|
@@ -92,7 +87,7 @@ module PDFToHTMLR
|
|
92
87
|
raise PDFToHTMLRError, "invalid file url"
|
93
88
|
end
|
94
89
|
tempfile = Tempfile.new('pdftohtmlr')
|
95
|
-
File.open(tempfile.path, '
|
90
|
+
File.open(tempfile.path, 'wb') {|f| f.write(open(input_url).read) }
|
96
91
|
super(tempfile.path, target_path, user_pwd, owner_pwd)
|
97
92
|
rescue => bang
|
98
93
|
raise PDFToHTMLRError, bang.to_s
|
data/test/pdftohtmlr_test.rb
CHANGED
@@ -36,7 +36,7 @@ class PdfFileTest < Test::Unit::TestCase
|
|
36
36
|
def test_string_from_pdffile
|
37
37
|
file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
|
38
38
|
assert_equal "String", file.convert().class.to_s
|
39
|
-
assert_equal `pdftohtml -stdout #{TEST_PDF_PATH}`, file.convert()
|
39
|
+
assert_equal `pdftohtml -stdout "#{TEST_PDF_PATH}"`, file.convert()
|
40
40
|
end
|
41
41
|
|
42
42
|
def test_invalid_pwd_pdffile
|
@@ -50,7 +50,7 @@ class PdfFileTest < Test::Unit::TestCase
|
|
50
50
|
def test_valid_pwd_pdffile
|
51
51
|
file = PdfFilePath.new(TEST_PWD_PDF_PATH, ".", "user", nil)
|
52
52
|
assert_equal "String", file.convert().class.to_s
|
53
|
-
assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
|
53
|
+
assert_equal `pdftohtml -stdout -upw user "#{TEST_PWD_PDF_PATH}"`,
|
54
54
|
file.convert()
|
55
55
|
end
|
56
56
|
|
@@ -59,7 +59,7 @@ class PdfFileTest < Test::Unit::TestCase
|
|
59
59
|
assert_equal "Nokogiri::HTML::Document",
|
60
60
|
file.convert_to_document().class.to_s
|
61
61
|
assert_equal Nokogiri::HTML.parse(
|
62
|
-
`pdftohtml -stdout #{TEST_PDF_PATH}`
|
62
|
+
`pdftohtml -stdout "#{TEST_PDF_PATH}"`
|
63
63
|
).css('body').first.to_s,
|
64
64
|
file.convert_to_document().css('body').first.to_s
|
65
65
|
end
|
@@ -90,7 +90,7 @@ class PdfFileTest < Test::Unit::TestCase
|
|
90
90
|
# http://github.com/kitplummer/pdftohtmlr/raw/master/test/test.pdf
|
91
91
|
file = PdfFileUrl.new(TEST_URL_PDF, ".", nil, nil)
|
92
92
|
assert_equal "String", file.convert().class.to_s
|
93
|
-
assert_equal `pdftohtml -stdout #{TEST_PDF_PATH}`, file.convert()
|
93
|
+
assert_equal `pdftohtml -stdout "#{TEST_PDF_PATH}"`, file.convert()
|
94
94
|
end
|
95
95
|
|
96
96
|
def test_args
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdftohtmlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: "0.4"
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kit Plummer
|
@@ -9,7 +9,7 @@ autorequire: pdftohtml
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-12-
|
12
|
+
date: 2009-12-18 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
|