pdftohtmlr 0.4 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/lib/pdftohtmlr.rb +17 -5
- data/test/pdftohtmlr_test.rb +15 -0
- metadata +12 -3
data/Rakefile
CHANGED
@@ -55,7 +55,7 @@ spec = Gem::Specification.new do |s|
|
|
55
55
|
s.requirements << 'none'
|
56
56
|
s.require_path = 'lib'
|
57
57
|
s.autorequire = 'pdftohtml'
|
58
|
-
|
58
|
+
s.add_dependency("nokogiri", ">= 1.3.3")
|
59
59
|
s.files = [ "Rakefile", "README.textile", "MIT-LICENSE" ]
|
60
60
|
s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
|
61
61
|
s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) || item.include?("\.png") }
|
data/lib/pdftohtmlr.rb
CHANGED
@@ -20,7 +20,7 @@ module PDFToHTMLR
|
|
20
20
|
# Simple local error abstraction
|
21
21
|
class PDFToHTMLRError < RuntimeError; end
|
22
22
|
|
23
|
-
VERSION = '0.4'
|
23
|
+
VERSION = '0.4.1'
|
24
24
|
|
25
25
|
# Provides facilities for converting PDFs to HTML from Ruby code.
|
26
26
|
class PdfFile
|
@@ -28,7 +28,8 @@ module PDFToHTMLR
|
|
28
28
|
attr :target
|
29
29
|
attr :user_pwd
|
30
30
|
attr :owner_pwd
|
31
|
-
|
31
|
+
attr :format
|
32
|
+
|
32
33
|
def initialize(input_path, target_path=nil, user_pwd=nil, owner_pwd=nil)
|
33
34
|
@path = input_path
|
34
35
|
@target = target_path
|
@@ -40,12 +41,13 @@ module PDFToHTMLR
|
|
40
41
|
def convert()
|
41
42
|
errors = ""
|
42
43
|
output = ""
|
44
|
+
|
43
45
|
if @user_pwd
|
44
|
-
cmd = "pdftohtml -stdout -upw #{@user_pwd}" + ' "' + @path + '"'
|
46
|
+
cmd = "pdftohtml -stdout #{@format} -upw #{@user_pwd}" + ' "' + @path + '"'
|
45
47
|
elsif @owner_pwd
|
46
|
-
cmd = "pdftohtml -stdout -opw #{@owner_pwd}" + ' "' + @path + '"'
|
48
|
+
cmd = "pdftohtml -stdout #{@format} -opw #{@owner_pwd}" + ' "' + @path + '"'
|
47
49
|
else
|
48
|
-
cmd = "pdftohtml -stdout" + ' "' + @path + '"'
|
50
|
+
cmd = "pdftohtml -stdout #{@format}" + ' "' + @path + '"'
|
49
51
|
end
|
50
52
|
|
51
53
|
output = `#{cmd} 2>&1`
|
@@ -63,6 +65,16 @@ module PDFToHTMLR
|
|
63
65
|
def convert_to_document()
|
64
66
|
Nokogiri::HTML.parse(convert())
|
65
67
|
end
|
68
|
+
|
69
|
+
def convert_to_xml()
|
70
|
+
@format = "-xml"
|
71
|
+
convert()
|
72
|
+
end
|
73
|
+
|
74
|
+
def convert_to_xml_document()
|
75
|
+
@format = "-xml"
|
76
|
+
Nokogiri::XML.parse(convert())
|
77
|
+
end
|
66
78
|
end
|
67
79
|
|
68
80
|
# Handle a string-based local path as input, extends PdfFile
|
data/test/pdftohtmlr_test.rb
CHANGED
@@ -64,6 +64,21 @@ class PdfFileTest < Test::Unit::TestCase
|
|
64
64
|
file.convert_to_document().css('body').first.to_s
|
65
65
|
end
|
66
66
|
|
67
|
+
def test_return_xml
|
68
|
+
file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
|
69
|
+
assert_equal "String", file.convert_to_xml().class.to_s
|
70
|
+
end
|
71
|
+
|
72
|
+
def test_return_xml_document
|
73
|
+
file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
|
74
|
+
assert_equal "Nokogiri::XML::Document",
|
75
|
+
file.convert_to_xml_document().class.to_s
|
76
|
+
assert_equal Nokogiri::XML.parse(
|
77
|
+
`pdftohtml -stdout -xml "#{TEST_PDF_PATH}"`
|
78
|
+
).css('text').first.to_s,
|
79
|
+
file.convert_to_document().css('text').first.to_s
|
80
|
+
end
|
81
|
+
|
67
82
|
def test_invalid_URL_pdffile
|
68
83
|
e = assert_raise PDFToHTMLRError do
|
69
84
|
file = PdfFileUrl.new("blah", ".", nil, nil)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdftohtmlr
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 0.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kit Plummer
|
@@ -11,8 +11,17 @@ cert_chain: []
|
|
11
11
|
|
12
12
|
date: 2009-12-18 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.3.3
|
24
|
+
version:
|
16
25
|
description: Uses command-line pdftohtml tools to convert PDF files to HTML.
|
17
26
|
email: kitplummer@gmail.com
|
18
27
|
executables: []
|