pdftohtmlr 0.4 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/lib/pdftohtmlr.rb +17 -5
- data/test/pdftohtmlr_test.rb +15 -0
- metadata +12 -3
data/Rakefile
CHANGED
@@ -55,7 +55,7 @@ spec = Gem::Specification.new do |s|
|
|
55
55
|
s.requirements << 'none'
|
56
56
|
s.require_path = 'lib'
|
57
57
|
s.autorequire = 'pdftohtml'
|
58
|
-
|
58
|
+
s.add_dependency("nokogiri", ">= 1.3.3")
|
59
59
|
s.files = [ "Rakefile", "README.textile", "MIT-LICENSE" ]
|
60
60
|
s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
|
61
61
|
s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) || item.include?("\.png") }
|
data/lib/pdftohtmlr.rb
CHANGED
@@ -20,7 +20,7 @@ module PDFToHTMLR
|
|
20
20
|
# Simple local error abstraction
|
21
21
|
class PDFToHTMLRError < RuntimeError; end
|
22
22
|
|
23
|
-
VERSION = '0.4'
|
23
|
+
VERSION = '0.4.1'
|
24
24
|
|
25
25
|
# Provides facilities for converting PDFs to HTML from Ruby code.
|
26
26
|
class PdfFile
|
@@ -28,7 +28,8 @@ module PDFToHTMLR
|
|
28
28
|
attr :target
|
29
29
|
attr :user_pwd
|
30
30
|
attr :owner_pwd
|
31
|
-
|
31
|
+
attr :format
|
32
|
+
|
32
33
|
def initialize(input_path, target_path=nil, user_pwd=nil, owner_pwd=nil)
|
33
34
|
@path = input_path
|
34
35
|
@target = target_path
|
@@ -40,12 +41,13 @@ module PDFToHTMLR
|
|
40
41
|
def convert()
|
41
42
|
errors = ""
|
42
43
|
output = ""
|
44
|
+
|
43
45
|
if @user_pwd
|
44
|
-
cmd = "pdftohtml -stdout -upw #{@user_pwd}" + ' "' + @path + '"'
|
46
|
+
cmd = "pdftohtml -stdout #{@format} -upw #{@user_pwd}" + ' "' + @path + '"'
|
45
47
|
elsif @owner_pwd
|
46
|
-
cmd = "pdftohtml -stdout -opw #{@owner_pwd}" + ' "' + @path + '"'
|
48
|
+
cmd = "pdftohtml -stdout #{@format} -opw #{@owner_pwd}" + ' "' + @path + '"'
|
47
49
|
else
|
48
|
-
cmd = "pdftohtml -stdout" + ' "' + @path + '"'
|
50
|
+
cmd = "pdftohtml -stdout #{@format}" + ' "' + @path + '"'
|
49
51
|
end
|
50
52
|
|
51
53
|
output = `#{cmd} 2>&1`
|
@@ -63,6 +65,16 @@ module PDFToHTMLR
|
|
63
65
|
def convert_to_document()
|
64
66
|
Nokogiri::HTML.parse(convert())
|
65
67
|
end
|
68
|
+
|
69
|
+
def convert_to_xml()
|
70
|
+
@format = "-xml"
|
71
|
+
convert()
|
72
|
+
end
|
73
|
+
|
74
|
+
def convert_to_xml_document()
|
75
|
+
@format = "-xml"
|
76
|
+
Nokogiri::XML.parse(convert())
|
77
|
+
end
|
66
78
|
end
|
67
79
|
|
68
80
|
# Handle a string-based local path as input, extends PdfFile
|
data/test/pdftohtmlr_test.rb
CHANGED
@@ -64,6 +64,21 @@ class PdfFileTest < Test::Unit::TestCase
|
|
64
64
|
file.convert_to_document().css('body').first.to_s
|
65
65
|
end
|
66
66
|
|
67
|
+
def test_return_xml
|
68
|
+
file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
|
69
|
+
assert_equal "String", file.convert_to_xml().class.to_s
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_return_xml_document
|
73
|
+
file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
|
74
|
+
assert_equal "Nokogiri::XML::Document",
|
75
|
+
file.convert_to_xml_document().class.to_s
|
76
|
+
assert_equal Nokogiri::XML.parse(
|
77
|
+
`pdftohtml -stdout -xml "#{TEST_PDF_PATH}"`
|
78
|
+
).css('text').first.to_s,
|
79
|
+
file.convert_to_document().css('text').first.to_s
|
80
|
+
end
|
81
|
+
|
67
82
|
def test_invalid_URL_pdffile
|
68
83
|
e = assert_raise PDFToHTMLRError do
|
69
84
|
file = PdfFileUrl.new("blah", ".", nil, nil)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdftohtmlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kit Plummer
|
@@ -11,8 +11,17 @@ cert_chain: []
|
|
11
11
|
|
12
12
|
date: 2009-12-18 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.3.3
|
24
|
+
version:
|
16
25
|
description: Uses command-line pdftohtml tools to convert PDF files to HTML.
|
17
26
|
email: kitplummer@gmail.com
|
18
27
|
executables: []
|