pdftohtmlr 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/MIT-LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2009 kitplummer@gmail.com
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
data/README.textile ADDED
@@ -0,0 +1,20 @@
1
+ h1. pdftohtmlr
2
+
3
+ Wrapper around the command line tool pdftohtml which converts PDF to HTML, go figure.
4
+
5
+ This gem was inspired by the MiniMagick gem - which does the same thing for ImageMagick (thanks Corey).
6
+
7
+ h1. requirements
8
+
9
+ Just pdftohtml and Ruby (1.8.6+ as far as I know).
10
+
11
+ h1. using
12
+ <pre>
13
+ <code>
14
+ file = PdfFile.new([Path to Source PDF],
15
+ [Target File (not implemented yet)],
16
+ [user password],
17
+ [owner password])
18
+ doc = file.convert
19
+ </code>
20
+ </pre>
data/Rakefile ADDED
@@ -0,0 +1,68 @@
1
+ require 'rake'
2
+ require 'rake/testtask'
3
+ require 'rake/rdoctask'
4
+ require 'rake/packagetask'
5
+ require 'rake/gempackagetask'
6
+
7
+ $:.unshift(File.dirname(__FILE__) + "/lib")
8
+ require 'pdftohtmlr'
9
+
10
+ PKG_NAME = 'pdftohtmlr'
11
+ PKG_VERSION = PDFToHTMLR::VERSION
12
+ PKG_FILE_NAME = "#{PKG_NAME}-#{PKG_VERSION}"
13
+
14
+ desc 'Default: run unit tests.'
15
+ task :default => :test
16
+
17
+ desc "Clean generated files"
18
+ task :clean do
19
+ rm FileList['test/output/*.png']
20
+ rm_rf 'pkg'
21
+ rm_rf 'doc'
22
+ end
23
+
24
+ desc 'Test the pdftohtmlr gem.'
25
+ Rake::TestTask.new(:test) do |t|
26
+ t.libs << 'lib'
27
+ t.pattern = 'test/**/*_test.rb'
28
+ t.verbose = true
29
+ end
30
+
31
+ desc 'Generate documentation for the pdftohtmlr gem.'
32
+ Rake::RDocTask.new(:rdoc) do |rdoc|
33
+ rdoc.rdoc_dir = 'rdoc'
34
+ rdoc.title = 'pdftohtmlr'
35
+ rdoc.options << '--line-numbers --inline-source'
36
+ rdoc.rdoc_files.include('README')
37
+ rdoc.rdoc_files.include('lib/**/*.rb')
38
+ end
39
+
40
+
41
+ # Create compressed packages
42
+ spec = Gem::Specification.new do |s|
43
+ s.platform = Gem::Platform::RUBY
44
+ s.name = PKG_NAME
45
+ s.summary = "Convert PDF documents to HTML."
46
+ s.description = %q{Uses command-line pdftohtml tools to convert PDF files to HTML.}
47
+ s.version = PKG_VERSION
48
+
49
+ s.author = "Kit Plummer"
50
+ s.email = "kitplummer@gmail.com"
51
+ s.rubyforge_project = PKG_NAME
52
+ s.homepage = "http://github.com/kitplummer/pdftohtmlr"
53
+
54
+ s.has_rdoc = true
55
+ s.requirements << 'none'
56
+ s.require_path = 'lib'
57
+ s.autorequire = 'pdftohtml'
58
+
59
+ s.files = [ "Rakefile", "README.textile", "MIT-LICENSE" ]
60
+ s.files = s.files + Dir.glob( "lib/**/*" ).delete_if { |item| item.include?( "\.svn" ) }
61
+ s.files = s.files + Dir.glob( "test/**/*" ).delete_if { |item| item.include?( "\.svn" ) || item.include?("\.png") }
62
+ end
63
+
64
+ Rake::GemPackageTask.new(spec) do |p|
65
+ p.gem_spec = spec
66
+ p.need_tar = false
67
+ p.need_zip = true
68
+ end
data/lib/pdftohtmlr.rb ADDED
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'open3'
3
+
4
+ module PDFToHTMLR
5
+ class PDFToHTMLRError < RuntimeError; end
6
+ VERSION = '0.2.0'
7
+
8
+ class PdfFile
9
+ attr :path
10
+ attr :target
11
+ attr :user_pwd
12
+ attr :owner_pwd
13
+
14
+ def initialize(input_path, target_path, user_pwd, owner_pwd)
15
+ @path = input_path
16
+ @target = target_path
17
+ @user_pwd = user_pwd
18
+ @owner_pwd = owner_pwd
19
+
20
+ # check to make sure file is legit
21
+ if (!File.exist?(@path))
22
+ raise PDFToHTMLRError, "invalid file path"
23
+ end
24
+
25
+ end
26
+
27
+ def convert()
28
+ errors = ""
29
+ output = ""
30
+ if @user_pwd
31
+ cmd = "pdftohtml -stdout -upw #{@user_pwd} #{@path}"
32
+ elsif @owner_pwd
33
+ cmd = "pdftohtml -stdout -opw #{@owner_pwd} #{@path}"
34
+ else
35
+ cmd = "pdftohtml -stdout #{@path}"
36
+ end
37
+
38
+ Open3.popen3 cmd do | stdin, stdout, stderr|
39
+ stdin.write cmd
40
+ stdin.close
41
+ output = stdout.read
42
+ errors = stderr.read
43
+ end
44
+
45
+ if (errors != "")
46
+ raise PDFToHTMLRError, errors.to_s
47
+ else
48
+ return output
49
+ end
50
+ end
51
+
52
+ end
53
+ end
@@ -0,0 +1,52 @@
1
+ require 'test/unit'
2
+ require File.join(File.dirname(__FILE__), '../lib/pdftohtmlr')
3
+
4
+ class PdfFileTest < Test::Unit::TestCase
5
+ include PDFToHTMLR
6
+
7
+ CURRENT_DIR = File.dirname(File.expand_path(__FILE__)) + "/"
8
+ TEST_PDF_PATH = CURRENT_DIR + "test.pdf"
9
+ TEST_PWD_PDF_PATH = CURRENT_DIR + "test_pw.pdf"
10
+ TEST_BAD_PATH = "blah.pdf"
11
+ TEST_NON_PDF = CURRENT_DIR + "pdftohtmlr_test.rb"
12
+
13
+ def test_pdffile_new
14
+ file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
15
+ assert file
16
+ end
17
+
18
+ def test_invalid_pdffile
19
+ e = assert_raise PDFToHTMLRError do
20
+ file = PdfFile.new(TEST_NON_PDF, ".", nil, nil)
21
+ file.convert
22
+ end
23
+ puts e
24
+ end
25
+
26
+ def test_bad_pdffile_new
27
+ assert_raise PDFToHTMLRError do
28
+ file = PdfFile.new(TEST_BAD_PATH, ".", nil, nil)
29
+ end
30
+ end
31
+
32
+ def test_string_from_pdffile
33
+ file = PdfFile.new(TEST_PDF_PATH, ".", nil, nil)
34
+ assert_equal "String", file.convert().class.to_s
35
+ assert_equal `pdftohtml -stdout #{TEST_PDF_PATH}`, file.convert()
36
+ end
37
+
38
+ def test_invalid_pwd_pdffile
39
+ assert_raise PDFToHTMLRError do
40
+ file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "blah", nil)
41
+ file.convert
42
+ end
43
+ end
44
+
45
+ def test_valid_pwd_pdffile
46
+ file = PdfFile.new(TEST_PWD_PDF_PATH, ".", "user", nil)
47
+ assert_equal "String", file.convert().class.to_s
48
+ assert_equal `pdftohtml -stdout -upw user #{TEST_PWD_PDF_PATH}`,
49
+ file.convert()
50
+ end
51
+
52
+ end
data/test/test.pdf ADDED
Binary file
data/test/test_pw.pdf ADDED
Binary file
metadata ADDED
@@ -0,0 +1,61 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pdftohtmlr
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Kit Plummer
8
+ autorequire: pdftohtml
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-12-10 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: Uses command-line pdftohtml tools to convert PDF files to HTML.
17
+ email: kitplummer@gmail.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - Rakefile
26
+ - README.textile
27
+ - MIT-LICENSE
28
+ - lib/pdftohtmlr.rb
29
+ - test/pdftohtmlr_test.rb
30
+ - test/test.pdf
31
+ - test/test_pw.pdf
32
+ has_rdoc: true
33
+ homepage: http://github.com/kitplummer/pdftohtmlr
34
+ licenses: []
35
+
36
+ post_install_message:
37
+ rdoc_options: []
38
+
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ version:
53
+ requirements:
54
+ - none
55
+ rubyforge_project: pdftohtmlr
56
+ rubygems_version: 1.3.5
57
+ signing_key:
58
+ specification_version: 3
59
+ summary: Convert PDF documents to HTML.
60
+ test_files: []
61
+