RubyGems - pdftohtmlr - Versions diffs - 0.2.0 → 0.3.0 - Mend

pdftohtmlr 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

data/README.textile CHANGED Viewed

@@ -8,13 +8,22 @@ h1. requirements
 Just pdftohtml and Ruby (1.8.6+ as far as I know).
+h1. install
+"http://gemcutter.org/gems/pdftohtmlr":http://gemcutter.org/gems/pdftohtmlr
+<pre><code>gem install pdftohtmlr</code></pre>
 h1. using
-<pre>
-<code>
+<pre><code lang="ruby">require 'pdftohtmlr'
+require 'nokogiri'
 file = PdfFile.new([Path to Source PDF],
                    [Target File (not implemented yet)],
                    [user password],
                    [owner password])
-doc = file.convert
-</code>
-</pre>
+string = file.convert
+doc = file.convert_to_document()</code></pre>
+h1. license
+MIT

data/lib/pdftohtmlr.rb CHANGED Viewed

@@ -1,10 +1,26 @@
+# The library has a single method for converting PDF files into HTML. The
+# method current takes in the source path, and either/both the user and owner
+# passwords set on the source PDF document.  The convert method returns the
+# HTML as a string for further manipulation of loading into a Document.
+#
+# Requires that pdftohtml be installed and on the path
+#
+# Author:: Kit Plummer (mailto:kitplummer@gmail.com)
+# Copyright:: Copyright (c) 2009 Kit Plummer
+# License:: MIT
 require 'rubygems'
 require 'open3'
+require 'nokogiri'
 module PDFToHTMLR
+  # Simple local error abstraction
   class PDFToHTMLRError < RuntimeError; end
-  VERSION = '0.2.0'
+  VERSION = '0.3.0'
+  # Provides facilities for converting PDFs to HTML from Ruby code.
   class PdfFile
     attr :path
     attr :target
@@ -24,6 +40,7 @@ module PDFToHTMLR
     end
+    # Convert the PDF document to HTML.  Returns a string
     def convert()
       errors = ""
       output = ""
@@ -49,5 +66,10 @@ module PDFToHTMLR
       end
     end
+    # Convert the PDF document to HTML.  Returns a Nokogiri::HTML:Document
+    def convert_to_document()
+      Nokogiri::HTML.parse(convert())
+    end
   end
 end

data/test/pdftohtmlr_test.rb CHANGED Viewed

@@ -20,7 +20,6 @@ class PdfFileTest < Test::Unit::TestCase
       file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
       file.convert
     end
-    puts e
   end
   def test_bad_pdffile_new
@@ -48,5 +47,15 @@ class PdfFileTest < Test::Unit::TestCase
     assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
     file.convert()
   end
+  def test_return_document
+    file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
+    assert_equal "Nokogiri::HTML::Document",
+     file.convert_to_document().class.to_s
+    assert_equal Nokogiri::HTML.parse(
+        `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`
+      ).css('body').first.to_s,
+       file.convert_to_document().css('body').first.to_s
+  end
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: pdftohtmlr
 version: !ruby/object:Gem::Version
-  version: 0.2.0
+  version: 0.3.0
 platform: ruby
 authors:
 - Kit Plummer
@@ -9,7 +9,7 @@ autorequire: pdftohtml
 bindir: bin
 cert_chain: []
-date: 2009-12-10 00:00:00 -07:00
+date: 2009-12-13 00:00:00 -07:00
 default_executable:
 dependencies: []