pdftohtmlr 0.4 → 0.4.1

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -55,7 +55,7 @@ spec = Gem::Specification.new do |s|
55
55
  s.requirements << 'none'
56
56
  s.require_path = 'lib'
57
57
  s.autorequire = 'pdftohtml'
58
-
58
+ s.add_dependency("nokogiri", ">= 1.3.3")
59
59
  s.files = [ "Rakefile", "README.textile", "MIT-LICENSE" ]
60
60
  s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
61
61
  s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) || item.include?("\.png") }
data/lib/pdftohtmlr.rb CHANGED
@@ -20,7 +20,7 @@ module PDFToHTMLR
20
20
  # Simple local error abstraction
21
21
  class PDFToHTMLRError < RuntimeError; end
22
22
 
23
- VERSION = '0.4'
23
+ VERSION = '0.4.1'
24
24
 
25
25
  # Provides facilities for converting PDFs to HTML from Ruby code.
26
26
  class PdfFile
@@ -28,7 +28,8 @@ module PDFToHTMLR
28
28
  attr :target
29
29
  attr :user_pwd
30
30
  attr :owner_pwd
31
-
31
+ attr :format
32
+
32
33
  def initialize(input_path, target_path=nil, user_pwd=nil, owner_pwd=nil)
33
34
  @path = input_path
34
35
  @target = target_path
@@ -40,12 +41,13 @@ module PDFToHTMLR
40
41
  def convert()
41
42
  errors = ""
42
43
  output = ""
44
+
43
45
  if @user_pwd
44
- cmd = "pdftohtml -stdout -upw #{@user_pwd}" + ' "' + @path + '"'
46
+ cmd = "pdftohtml -stdout #{@format} -upw #{@user_pwd}" + ' "' + @path + '"'
45
47
  elsif @owner_pwd
46
- cmd = "pdftohtml -stdout -opw #{@owner_pwd}" + ' "' + @path + '"'
48
+ cmd = "pdftohtml -stdout #{@format} -opw #{@owner_pwd}" + ' "' + @path + '"'
47
49
  else
48
- cmd = "pdftohtml -stdout" + ' "' + @path + '"'
50
+ cmd = "pdftohtml -stdout #{@format}" + ' "' + @path + '"'
49
51
  end
50
52
 
51
53
  output = `#{cmd} 2>&1`
@@ -63,6 +65,16 @@ module PDFToHTMLR
63
65
  def convert_to_document()
64
66
  Nokogiri::HTML.parse(convert())
65
67
  end
68
+
69
+ def convert_to_xml()
70
+ @format = "-xml"
71
+ convert()
72
+ end
73
+
74
+ def convert_to_xml_document()
75
+ @format = "-xml"
76
+ Nokogiri::XML.parse(convert())
77
+ end
66
78
  end
67
79
 
68
80
  # Handle a string-based local path as input, extends PdfFile
@@ -64,6 +64,21 @@ class PdfFileTest < Test::Unit::TestCase
64
64
  file.convert_to_document().css('body').first.to_s
65
65
  end
66
66
 
67
+ def test_return_xml
68
+ file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
69
+ assert_equal "String", file.convert_to_xml().class.to_s
70
+ end
71
+
72
+ def test_return_xml_document
73
+ file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
74
+ assert_equal "Nokogiri::XML::Document",
75
+ file.convert_to_xml_document().class.to_s
76
+ assert_equal Nokogiri::XML.parse(
77
+ `pdftohtml -stdout -xml "#{TEST_PDF_PATH}"`
78
+ ).css('text').first.to_s,
79
+ file.convert_to_document().css('text').first.to_s
80
+ end
81
+
67
82
  def test_invalid_URL_pdffile
68
83
  e = assert_raise PDFToHTMLRError do
69
84
  file = PdfFileUrl.new("blah", ".", nil, nil)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdftohtmlr
3
3
  version: !ruby/object:Gem::Version
4
- version: "0.4"
4
+ version: 0.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kit Plummer
@@ -11,8 +11,17 @@ cert_chain: []
11
11
 
12
12
  date: 2009-12-18 00:00:00 -07:00
13
13
  default_executable:
14
- dependencies: []
15
-
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.3.3
24
+ version:
16
25
  description: Uses command-line pdftohtml tools to convert PDF files to HTML.
17
26
  email: kitplummer@gmail.com
18
27
  executables: []