RubyGems - pdftohtmlr - Versions diffs - 0.2.0 - Mend

pdftohtmlr 0.2.0

Files changed (8) hide show

data/MIT-LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+Copyright (c) 2009 kitplummer@gmail.com
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

data/README.textile ADDED Viewed

@@ -0,0 +1,20 @@
+h1. pdftohtmlr
+Wrapper around the command line tool pdftohtml which converts PDF to HTML, go figure.
+This gem was inspired by the MiniMagick gem - which does the same thing for ImageMagick (thanks Corey).
+h1. requirements
+Just pdftohtml and Ruby (1.8.6+ as far as I know).
+h1. using
+<pre>
+<code>
+file = PdfFile.new([Path to Source PDF],
+                   [Target File (not implemented yet)],
+                   [user password],
+                   [owner password])
+doc = file.convert
+</code>
+</pre>

data/Rakefile ADDED Viewed

@@ -0,0 +1,68 @@
+require 'rake'
+require 'rake/testtask'
+require 'rake/rdoctask'
+require 'rake/packagetask'
+require 'rake/gempackagetask'
+$:.unshift(File.dirname(__FILE__) + "/lib")
+require 'pdftohtmlr'
+PKG_NAME      = 'pdftohtmlr'
+PKG_VERSION   = PDFToHTMLR::VERSION
+PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
+desc 'Default: run unit tests.'
+task :default => :test
+desc "Clean generated files"
+task :clean do
+  rm FileList['test/output/*.png']
+  rm_rf 'pkg'
+  rm_rf 'doc'
+end
+desc 'Test the pdftohtmlr gem.'
+Rake::TestTask.new(:test) do |t|
+  t.libs << 'lib'
+  t.pattern = 'test/**/*_test.rb'
+  t.verbose = true
+end
+desc 'Generate documentation for the pdftohtmlr gem.'
+Rake::RDocTask.new(:rdoc) do |rdoc|
+  rdoc.rdoc_dir = 'rdoc'
+  rdoc.title    = 'pdftohtmlr'
+  rdoc.options << '--line-numbers --inline-source'
+  rdoc.rdoc_files.include('README')
+  rdoc.rdoc_files.include('lib/**/*.rb')
+end
+# Create compressed packages
+spec = Gem::Specification.new do |s|
+  s.platform = Gem::Platform::RUBY
+  s.name = PKG_NAME
+  s.summary = "Convert PDF documents to HTML."
+  s.description = %q{Uses command-line pdftohtml tools to convert PDF files to HTML.}
+  s.version = PKG_VERSION
+  s.author = "Kit Plummer"
+  s.email = "kitplummer@gmail.com"
+  s.rubyforge_project = PKG_NAME
+  s.homepage = "http://github.com/kitplummer/pdftohtmlr"
+  s.has_rdoc = true
+  s.requirements << 'none'
+  s.require_path = 'lib'
+  s.autorequire = 'pdftohtml'
+  s.files = [ "Rakefile", "README.textile", "MIT-LICENSE" ]
+  s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
+  s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) || item.include?("\.png") }
+end
+Rake::GemPackageTask.new(spec) do |p|
+  p.gem_spec = spec
+  p.need_tar = false
+  p.need_zip = true
+end

data/lib/pdftohtmlr.rb ADDED Viewed

@@ -0,0 +1,53 @@
+require 'rubygems'
+require 'open3'
+module PDFToHTMLR
+  class PDFToHTMLRError < RuntimeError; end
+  VERSION = '0.2.0'
+  class PdfFile
+    attr :path
+    attr :target
+    attr :user_pwd
+    attr :owner_pwd
+    def initialize(input_path, target_path, user_pwd, owner_pwd)
+      @path = input_path
+      @target = target_path
+      @user_pwd = user_pwd
+      @owner_pwd = owner_pwd
+      # check to make sure file is legit
+      if (!File.exist?(@path))
+        raise PDFToHTMLRError, "invalid file path"
+      end
+    end
+    def convert()
+      errors = ""
+      output = ""
+      if @user_pwd
+        cmd = "pdftohtml -stdout -upw #{@user_pwd} #{@path}"
+      elsif @owner_pwd
+        cmd = "pdftohtml -stdout -opw #{@owner_pwd} #{@path}"
+      else
+        cmd = "pdftohtml -stdout #{@path}"
+      end
+      Open3.popen3 cmd do | stdin, stdout, stderr|
+        stdin.write cmd
+        stdin.close
+        output = stdout.read
+        errors = stderr.read
+      end
+      if (errors != "")
+        raise PDFToHTMLRError, errors.to_s
+      else
+        return output
+      end
+    end
+  end
+end

data/test/pdftohtmlr_test.rb ADDED Viewed

@@ -0,0 +1,52 @@
+require 'test/unit'
+require File.join(File.dirname(__FILE__), '../lib/pdftohtmlr')
+class PdfFileTest < Test::Unit::TestCase
+  include PDFToHTMLR
+  CURRENT_DIR = File.dirname(File.expand_path(__FILE__)) + "/"
+  TEST_PDF_PATH = CURRENT_DIR + "test.pdf"
+  TEST_PWD_PDF_PATH = CURRENT_DIR + "test_pw.pdf"
+  TEST_BAD_PATH = "blah.pdf"
+  TEST_NON_PDF = CURRENT_DIR + "pdftohtmlr_test.rb"
+  def test_pdffile_new
+    file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
+    assert file
+  end
+  def test_invalid_pdffile
+    e = assert_raise PDFToHTMLRError do
+      file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
+      file.convert
+    end
+    puts e
+  end
+  def test_bad_pdffile_new
+    assert_raise PDFToHTMLRError do
+      file = PdfFile.new(TEST_BAD_PATH, ".", nil, nil)
+    end
+  end
+  def test_string_from_pdffile
+    file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
+    assert_equal "String", file.convert().class.to_s
+    assert_equal `pdftohtml -stdout #{TEST_PDF_PATH}`, file.convert()
+  end
+  def test_invalid_pwd_pdffile
+    assert_raise PDFToHTMLRError do
+      file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "blah", nil)
+      file.convert
+    end
+  end
+  def test_valid_pwd_pdffile
+    file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "user", nil)
+    assert_equal "String", file.convert().class.to_s
+    assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
+    file.convert()
+  end
+end

data/test/test.pdf ADDED Viewed

Binary file

data/test/test_pw.pdf ADDED Viewed

Binary file

metadata ADDED Viewed

@@ -0,0 +1,61 @@
+--- !ruby/object:Gem::Specification
+name: pdftohtmlr
+version: !ruby/object:Gem::Version
+  version: 0.2.0
+platform: ruby
+authors:
+- Kit Plummer
+autorequire: pdftohtml
+bindir: bin
+cert_chain: []
+date: 2009-12-10 00:00:00 -07:00
+default_executable:
+dependencies: []
+description: Uses command-line pdftohtml tools to convert PDF files to HTML.
+email: kitplummer@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- Rakefile
+- README.textile
+- MIT-LICENSE
+- lib/pdftohtmlr.rb
+- test/pdftohtmlr_test.rb
+- test/test.pdf
+- test/test_pw.pdf
+has_rdoc: true
+homepage: http://github.com/kitplummer/pdftohtmlr
+licenses: []
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements:
+- none
+rubyforge_project: pdftohtmlr
+rubygems_version: 1.3.5
+signing_key:
+specification_version: 3
+summary: Convert PDF documents to HTML.
+test_files: []