RubyGems - pdftohtmlr - Versions diffs - 0.4 → 0.4.1 - Mend

pdftohtmlr 0.4 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

data/Rakefile CHANGED Viewed

@@ -55,7 +55,7 @@ spec = Gem::Specification.new do |s|
   s.requirements << 'none'
   s.require_path = 'lib'
   s.autorequire = 'pdftohtml'
+  s.add_dependency("nokogiri", ">= 1.3.3")
   s.files = [ "Rakefile", "README.textile", "MIT-LICENSE" ]
   s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
   s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) || item.include?("\.png") }

data/lib/pdftohtmlr.rb CHANGED Viewed

@@ -20,7 +20,7 @@ module PDFToHTMLR
   # Simple local error abstraction
   class PDFToHTMLRError < RuntimeError; end
-  VERSION = '0.4'
+  VERSION = '0.4.1'
   # Provides facilities for converting PDFs to HTML from Ruby code.
   class PdfFile
@@ -28,7 +28,8 @@ module PDFToHTMLR
     attr :target
     attr :user_pwd
     attr :owner_pwd
+    attr :format
     def initialize(input_path, target_path=nil, user_pwd=nil, owner_pwd=nil)
       @path = input_path
       @target = target_path
@@ -40,12 +41,13 @@ module PDFToHTMLR
     def convert()
       errors = ""
       output = ""
       if @user_pwd
-        cmd = "pdftohtml -stdout -upw #{@user_pwd}" + ' "' + @path + '"'
+        cmd = "pdftohtml -stdout #{@format} -upw #{@user_pwd}" + ' "' + @path + '"'
       elsif @owner_pwd
-        cmd = "pdftohtml -stdout -opw #{@owner_pwd}" + ' "' + @path + '"'
+        cmd = "pdftohtml -stdout #{@format} -opw #{@owner_pwd}" + ' "' + @path + '"'
       else
-        cmd = "pdftohtml -stdout" + ' "' + @path + '"'
+        cmd = "pdftohtml -stdout #{@format}" + ' "' + @path + '"'
       end
       output = `#{cmd} 2>&1`
@@ -63,6 +65,16 @@ module PDFToHTMLR
     def convert_to_document()
       Nokogiri::HTML.parse(convert())
     end
+    def convert_to_xml()
+      @format = "-xml"
+      convert()
+    end
+    def convert_to_xml_document()
+      @format = "-xml"
+      Nokogiri::XML.parse(convert())
+    end
   end
   # Handle a string-based local path as input, extends PdfFile

data/test/pdftohtmlr_test.rb CHANGED Viewed

@@ -64,6 +64,21 @@ class PdfFileTest < Test::Unit::TestCase
        file.convert_to_document().css('body').first.to_s
   end
+  def test_return_xml
+    file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
+    assert_equal "String", file.convert_to_xml().class.to_s
+  end
+  def test_return_xml_document
+    file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
+    assert_equal "Nokogiri::XML::Document",
+     file.convert_to_xml_document().class.to_s
+    assert_equal Nokogiri::XML.parse(
+        `pdftohtml -stdout -xml "#{TEST_PDF_PATH}"`
+      ).css('text').first.to_s,
+       file.convert_to_document().css('text').first.to_s
+  end
   def test_invalid_URL_pdffile
     e = assert_raise PDFToHTMLRError do
       file = PdfFileUrl.new("blah", ".", nil, nil)

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: pdftohtmlr
 version: !ruby/object:Gem::Version
-  version: "0.4"
+  version: 0.4.1
 platform: ruby
 authors:
 - Kit Plummer
@@ -11,8 +11,17 @@ cert_chain: []
 date: 2009-12-18 00:00:00 -07:00
 default_executable:
-dependencies: []
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  type: :runtime
+  version_requirement:
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.3.3
+    version:
 description: Uses command-line pdftohtml tools to convert PDF files to HTML.
 email: kitplummer@gmail.com
 executables: []