RubyGems - pdftohtmlr - Versions diffs - 0.3.0 → 0.3.1 - Mend

pdftohtmlr 0.3.0 → 0.3.1

Files changed (5) hide show

data/README.textile CHANGED Viewed

@@ -15,15 +15,16 @@ h1. install
 <pre><code>gem install pdftohtmlr</code></pre>
 h1. using
+"gist examples":http://gist.github.com/254556.js?file=pdftohtmlr_example.rb"
 <pre><code lang="ruby">require 'pdftohtmlr'
 require 'nokogiri'
-file = PdfFile.new([Path to Source PDF],
-                   [Target File (not implemented yet)],
-                   [user password],
-                   [owner password])
+file = PdfFilePath.new([Path to Source PDF])
 string = file.convert
 doc = file.convert_to_document()</code></pre>
+See included test cases for more usage examples, including passwords and URL fetching.
 h1. license
-MIT
+MIT (See included MIT-LICENSE)

data/Rakefile CHANGED Viewed

@@ -18,7 +18,7 @@ desc "Clean generated files"
 task :clean do
   rm FileList['test/output/*.png']
   rm_rf 'pkg'
-  rm_rf 'doc'
+  rm_rf 'rdoc'
 end
 desc 'Test the pdftohtmlr gem.'
@@ -32,8 +32,8 @@ desc 'Generate documentation for the pdftohtmlr gem.'
 Rake::RDocTask.new(:rdoc) do |rdoc|
   rdoc.rdoc_dir = 'rdoc'
   rdoc.title    = 'pdftohtmlr'
-  rdoc.options << '--line-numbers --inline-source'
-  rdoc.rdoc_files.include('README')
+  rdoc.options << '--line-numbers'
+  rdoc.rdoc_files.include('README.textile')
   rdoc.rdoc_files.include('lib/**/*.rb')
 end

data/lib/pdftohtmlr.rb CHANGED Viewed

@@ -12,13 +12,16 @@
 require 'rubygems'
 require 'open3'
 require 'nokogiri'
+require 'uri'
+require 'open-uri'
+require 'tempfile'
 module PDFToHTMLR
   # Simple local error abstraction
   class PDFToHTMLRError < RuntimeError; end
-  VERSION = '0.3.0'
+  VERSION = '0.3.1'
   # Provides facilities for converting PDFs to HTML from Ruby code.
   class PdfFile
@@ -27,17 +30,11 @@ module PDFToHTMLR
     attr :user_pwd
     attr :owner_pwd
-    def initialize(input_path, target_path, user_pwd, owner_pwd)
+    def initialize(input_path, target_path=nil, user_pwd=nil, owner_pwd=nil)
       @path = input_path
       @target = target_path
       @user_pwd = user_pwd
-      @owner_pwd = owner_pwd
-      # check to make sure file is legit
-      if (!File.exist?(@path))
-        raise PDFToHTMLRError, "invalid file path"
-      end
+      @owner_pwd = owner_pwd
     end
     # Convert the PDF document to HTML.  Returns a string
@@ -60,7 +57,7 @@ module PDFToHTMLR
       end
       if (errors != "")
-        raise PDFToHTMLRError, errors.to_s
+        raise PDFToHTMLRError, errors.first.to_s.chomp
       else
         return output
       end
@@ -72,4 +69,34 @@ module PDFToHTMLR
     end
   end
+  # Handle a string-based local path as input, extends PdfFile
+  class PdfFilePath < PdfFile
+    def initialize(input_path, target_path=nil, user_pwd=nil, owner_pwd=nil)
+      # check to make sure file is legit
+      if (!File.exist?(input_path))
+        raise PDFToHTMLRError, "invalid file path"
+      end
+      super(input_path, target_path, user_pwd, owner_pwd)
+    end
+  end
+  # Handle a URI as a remote path to a PDF, extends PdfFile
+  class PdfFileUrl < PdfFile
+    def initialize(input_url, target_path=nil, user_pwd=nil, owner_pwd=nil)
+      # check to make sure file is legit
+      begin
+        if ((input_url =~ URI::regexp).nil?)
+          raise PDFToHTMLRError, "invalid file url"
+        end
+        tempfile = Tempfile.new('pdftohtmlr')
+        File.open(tempfile.path, 'w') {|f| f.write(open(input_url).read) }
+        super(tempfile.path, target_path, user_pwd, owner_pwd)
+      rescue => bang
+        raise PDFToHTMLRError, bang.to_s
+      end
+    end
+  end
 end

data/test/pdftohtmlr_test.rb CHANGED Viewed

@@ -9,53 +9,92 @@ class PdfFileTest < Test::Unit::TestCase
   TEST_PWD_PDF_PATH = CURRENT_DIR + "test_pw.pdf"
   TEST_BAD_PATH = "blah.pdf"
   TEST_NON_PDF = CURRENT_DIR + "pdftohtmlr_test.rb"
+  TEST_URL_PDF =
+   "http://github.com/kitplummer/pdftohtmlr/raw/master/test/test.pdf"
+  TEST_URL_NON_PDF =
+   "http://github.com/kitplummer/pdftohtmlr/raw/master/test/pdftohtmlr_test.rb"
   def test_pdffile_new
-    file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
+    file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
     assert file
   end
   def test_invalid_pdffile
     e = assert_raise PDFToHTMLRError do
-      file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
+      file = PdfFilePath.new(TEST_NON_PDF, ".", nil, nil)
       file.convert
     end
+    assert_equal "Error: May not be a PDF file (continuing anyway)", e.to_s
   end
   def test_bad_pdffile_new
-    assert_raise PDFToHTMLRError do
-      file = PdfFile.new(TEST_BAD_PATH, ".", nil, nil)
+    e = assert_raise PDFToHTMLRError do
+      file = PdfFilePath.new(TEST_BAD_PATH, ".", nil, nil)
     end
+    assert_equal "invalid file path", e.to_s
   end
   def test_string_from_pdffile
-    file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
+    file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
     assert_equal "String", file.convert().class.to_s
     assert_equal `pdftohtml -stdout #{TEST_PDF_PATH}`, file.convert()
   end
   def test_invalid_pwd_pdffile
-    assert_raise PDFToHTMLRError do
-      file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "blah", nil)
+    e = assert_raise PDFToHTMLRError do
+      file = PdfFilePath.new(TEST_PWD_PDF_PATH, ".", "blah", nil)
       file.convert
     end
+    assert_equal "Error: Incorrect password", e.to_s
   end
   def test_valid_pwd_pdffile
-    file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "user", nil)
+    file = PdfFilePath.new(TEST_PWD_PDF_PATH, ".", "user", nil)
     assert_equal "String", file.convert().class.to_s
     assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
     file.convert()
   end
   def test_return_document
-    file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
+    file = PdfFilePath.new(TEST_PDF_PATH, ".", nil, nil)
     assert_equal "Nokogiri::HTML::Document",
      file.convert_to_document().class.to_s
     assert_equal Nokogiri::HTML.parse(
-        `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`
+        `pdftohtml -stdout #{TEST_PDF_PATH}`
       ).css('body').first.to_s,
        file.convert_to_document().css('body').first.to_s
   end
+  def test_invalid_URL_pdffile
+    e = assert_raise PDFToHTMLRError do
+      file = PdfFileUrl.new("blah", ".", nil, nil)
+    end
+    assert_equal "invalid file url", e.to_s
+  end
+  def test_invalid_URL_resource_pdffile
+    e = assert_raise PDFToHTMLRError do
+      file = PdfFileUrl.new("http://github.com/kitplummer/blah", ".", nil, nil)
+    end
+    assert_equal "404 Not Found", e.to_s
+  end
+  def test_invalid_URL_pdf_pdffile
+    e = assert_raise PDFToHTMLRError do
+      file = PdfFileUrl.new(TEST_URL_NON_PDF, ".", nil, nil)
+      file.convert
+    end
+    assert_equal "Error: May not be a PDF file (continuing anyway)", e.to_s
+  end
+  def test_valid_URL_pdffile
+    # http://github.com/kitplummer/pdftohtmlr/raw/master/test/test.pdf
+    file = PdfFileUrl.new(TEST_URL_PDF, ".", nil, nil)
+    assert_equal "String", file.convert().class.to_s
+    assert_equal `pdftohtml -stdout #{TEST_PDF_PATH}`, file.convert()
+  end
+  def test_args
+    file = PdfFileUrl.new(TEST_URL_PDF)
+    assert_equal "String", file.convert().class.to_s
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: pdftohtmlr
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.3.1
 platform: ruby
 authors:
 - Kit Plummer
@@ -9,7 +9,7 @@ autorequire: pdftohtml
 bindir: bin
 cert_chain: []
-date: 2009-12-13 00:00:00 -07:00
+date: 2009-12-14 00:00:00 -07:00
 default_executable:
 dependencies: []